1 //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation specialized to NVPTX 11 // targets. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 16 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 17 18 #include "CGOpenMPRuntime.h" 19 #include "CodeGenFunction.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/IR/CallSite.h" 22 23 namespace clang { 24 namespace CodeGen { 25 26 class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { 27 public: 28 /// Defines the execution mode. 29 enum ExecutionMode { 30 /// SPMD execution mode (all threads are worker threads). 31 EM_SPMD, 32 /// Non-SPMD execution mode (1 master thread, others are workers). 33 EM_NonSPMD, 34 /// Unknown execution mode (orphaned directive). 35 EM_Unknown, 36 }; 37 private: 38 /// Parallel outlined function work for workers to execute. 39 llvm::SmallVector<llvm::Function *, 16> Work; 40 41 struct EntryFunctionState { 42 llvm::BasicBlock *ExitBB = nullptr; 43 }; 44 45 class WorkerFunctionState { 46 public: 47 llvm::Function *WorkerFn; 48 const CGFunctionInfo &CGFI; 49 SourceLocation Loc; 50 51 WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc); 52 53 private: 54 void createWorkerFunction(CodeGenModule &CGM); 55 }; 56 57 ExecutionMode getExecutionMode() const; 58 59 /// Emit the worker function for the current target region. 60 void emitWorkerFunction(WorkerFunctionState &WST); 61 62 /// Helper for worker function. Emit body of worker loop. 63 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); 64 65 /// Helper for non-SPMD target entry function. Guide the master and 66 /// worker threads to their respective locations. 67 void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, 68 WorkerFunctionState &WST); 69 70 /// Signal termination of OMP execution for non-SPMD target entry 71 /// function. 72 void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); 73 74 /// Helper for generic variables globalization prolog. 75 void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc); 76 77 /// Helper for generic variables globalization epilog. 78 void emitGenericVarsEpilog(CodeGenFunction &CGF); 79 80 /// Helper for SPMD mode target directive's entry function. 81 void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, 82 const OMPExecutableDirective &D); 83 84 /// Signal termination of SPMD mode execution. 85 void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); 86 87 // 88 // Base class overrides. 89 // 90 91 /// Creates offloading entry for the provided entry ID \a ID, 92 /// address \a Addr, size \a Size, and flags \a Flags. 93 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, 94 uint64_t Size, int32_t Flags, 95 llvm::GlobalValue::LinkageTypes Linkage) override; 96 97 /// Emit outlined function specialized for the Fork-Join 98 /// programming model for applicable target directives on the NVPTX device. 99 /// \param D Directive to emit. 100 /// \param ParentName Name of the function that encloses the target region. 101 /// \param OutlinedFn Outlined function value to be defined by this call. 102 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 103 /// \param IsOffloadEntry True if the outlined function is an offload entry. 104 /// An outlined function may not be an entry if, e.g. the if clause always 105 /// evaluates to false. 106 void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, 107 llvm::Function *&OutlinedFn, 108 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, 109 const RegionCodeGenTy &CodeGen); 110 111 /// Emit outlined function specialized for the Single Program 112 /// Multiple Data programming model for applicable target directives on the 113 /// NVPTX device. 114 /// \param D Directive to emit. 115 /// \param ParentName Name of the function that encloses the target region. 116 /// \param OutlinedFn Outlined function value to be defined by this call. 117 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 118 /// \param IsOffloadEntry True if the outlined function is an offload entry. 119 /// \param CodeGen Object containing the target statements. 120 /// An outlined function may not be an entry if, e.g. the if clause always 121 /// evaluates to false. 122 void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, 123 llvm::Function *&OutlinedFn, 124 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, 125 const RegionCodeGenTy &CodeGen); 126 127 /// Emit outlined function for 'target' directive on the NVPTX 128 /// device. 129 /// \param D Directive to emit. 130 /// \param ParentName Name of the function that encloses the target region. 131 /// \param OutlinedFn Outlined function value to be defined by this call. 132 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 133 /// \param IsOffloadEntry True if the outlined function is an offload entry. 134 /// An outlined function may not be an entry if, e.g. the if clause always 135 /// evaluates to false. 136 void emitTargetOutlinedFunction(const OMPExecutableDirective &D, 137 StringRef ParentName, 138 llvm::Function *&OutlinedFn, 139 llvm::Constant *&OutlinedFnID, 140 bool IsOffloadEntry, 141 const RegionCodeGenTy &CodeGen) override; 142 143 /// Emits code for parallel or serial call of the \a OutlinedFn with 144 /// variables captured in a record which address is stored in \a 145 /// CapturedStruct. 146 /// This call is for the Non-SPMD Execution Mode. 147 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 148 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 149 /// \param CapturedVars A pointer to the record with the references to 150 /// variables used in \a OutlinedFn function. 151 /// \param IfCond Condition in the associated 'if' clause, if it was 152 /// specified, nullptr otherwise. 153 void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 154 llvm::Value *OutlinedFn, 155 ArrayRef<llvm::Value *> CapturedVars, 156 const Expr *IfCond); 157 158 /// Emits code for parallel or serial call of the \a OutlinedFn with 159 /// variables captured in a record which address is stored in \a 160 /// CapturedStruct. 161 /// This call is for a parallel directive within an SPMD target directive. 162 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 163 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 164 /// \param CapturedVars A pointer to the record with the references to 165 /// variables used in \a OutlinedFn function. 166 /// \param IfCond Condition in the associated 'if' clause, if it was 167 /// specified, nullptr otherwise. 168 /// 169 void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 170 llvm::Value *OutlinedFn, 171 ArrayRef<llvm::Value *> CapturedVars, 172 const Expr *IfCond); 173 174 protected: 175 /// Get the function name of an outlined region. 176 // The name can be customized depending on the target. 177 // getOutlinedHelperName()178 StringRef getOutlinedHelperName() const override { 179 return "__omp_outlined__"; 180 } 181 182 public: 183 explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); 184 185 /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 186 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. 187 virtual void emitProcBindClause(CodeGenFunction &CGF, 188 OpenMPProcBindClauseKind ProcBind, 189 SourceLocation Loc) override; 190 191 /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 192 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' 193 /// clause. 194 /// \param NumThreads An integer value of threads. 195 virtual void emitNumThreadsClause(CodeGenFunction &CGF, 196 llvm::Value *NumThreads, 197 SourceLocation Loc) override; 198 199 /// This function ought to emit, in the general case, a call to 200 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed 201 // as these numbers are obtained through the PTX grid and block configuration. 202 /// \param NumTeams An integer expression of teams. 203 /// \param ThreadLimit An integer expression of threads. 204 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, 205 const Expr *ThreadLimit, SourceLocation Loc) override; 206 207 /// Emits inlined function for the specified OpenMP parallel 208 // directive. 209 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 210 /// kmp_int32 BoundID, struct context_vars*). 211 /// \param D OpenMP directive. 212 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 213 /// \param InnermostKind Kind of innermost directive (for simple directives it 214 /// is a directive itself, for combined - its innermost directive). 215 /// \param CodeGen Code generation sequence for the \a D directive. 216 llvm::Value * 217 emitParallelOutlinedFunction(const OMPExecutableDirective &D, 218 const VarDecl *ThreadIDVar, 219 OpenMPDirectiveKind InnermostKind, 220 const RegionCodeGenTy &CodeGen) override; 221 222 /// Emits inlined function for the specified OpenMP teams 223 // directive. 224 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 225 /// kmp_int32 BoundID, struct context_vars*). 226 /// \param D OpenMP directive. 227 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 228 /// \param InnermostKind Kind of innermost directive (for simple directives it 229 /// is a directive itself, for combined - its innermost directive). 230 /// \param CodeGen Code generation sequence for the \a D directive. 231 llvm::Value * 232 emitTeamsOutlinedFunction(const OMPExecutableDirective &D, 233 const VarDecl *ThreadIDVar, 234 OpenMPDirectiveKind InnermostKind, 235 const RegionCodeGenTy &CodeGen) override; 236 237 /// Emits code for teams call of the \a OutlinedFn with 238 /// variables captured in a record which address is stored in \a 239 /// CapturedStruct. 240 /// \param OutlinedFn Outlined function to be run by team masters. Type of 241 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 242 /// \param CapturedVars A pointer to the record with the references to 243 /// variables used in \a OutlinedFn function. 244 /// 245 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, 246 SourceLocation Loc, llvm::Value *OutlinedFn, 247 ArrayRef<llvm::Value *> CapturedVars) override; 248 249 /// Emits code for parallel or serial call of the \a OutlinedFn with 250 /// variables captured in a record which address is stored in \a 251 /// CapturedStruct. 252 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of 253 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 254 /// \param CapturedVars A pointer to the record with the references to 255 /// variables used in \a OutlinedFn function. 256 /// \param IfCond Condition in the associated 'if' clause, if it was 257 /// specified, nullptr otherwise. 258 void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 259 llvm::Value *OutlinedFn, 260 ArrayRef<llvm::Value *> CapturedVars, 261 const Expr *IfCond) override; 262 263 /// Emits a critical region. 264 /// \param CriticalName Name of the critical region. 265 /// \param CriticalOpGen Generator for the statement associated with the given 266 /// critical region. 267 /// \param Hint Value of the 'hint' clause (optional). 268 void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, 269 const RegionCodeGenTy &CriticalOpGen, 270 SourceLocation Loc, 271 const Expr *Hint = nullptr) override; 272 273 /// Emit a code for reduction clause. 274 /// 275 /// \param Privates List of private copies for original reduction arguments. 276 /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. 277 /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. 278 /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' 279 /// or 'operator binop(LHS, RHS)'. 280 /// \param Options List of options for reduction codegen: 281 /// WithNowait true if parent directive has also nowait clause, false 282 /// otherwise. 283 /// SimpleReduction Emit reduction operation only. Used for omp simd 284 /// directive on the host. 285 /// ReductionKind The kind of reduction to perform. 286 virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 287 ArrayRef<const Expr *> Privates, 288 ArrayRef<const Expr *> LHSExprs, 289 ArrayRef<const Expr *> RHSExprs, 290 ArrayRef<const Expr *> ReductionOps, 291 ReductionOptionsTy Options) override; 292 293 /// Returns specified OpenMP runtime function for the current OpenMP 294 /// implementation. Specialized for the NVPTX device. 295 /// \param Function OpenMP runtime function. 296 /// \return Specified function. 297 llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); 298 299 /// Translates the native parameter of outlined function if this is required 300 /// for target. 301 /// \param FD Field decl from captured record for the parameter. 302 /// \param NativeParam Parameter itself. 303 const VarDecl *translateParameter(const FieldDecl *FD, 304 const VarDecl *NativeParam) const override; 305 306 /// Gets the address of the native argument basing on the address of the 307 /// target-specific parameter. 308 /// \param NativeParam Parameter itself. 309 /// \param TargetParam Corresponding target-specific parameter. 310 Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, 311 const VarDecl *TargetParam) const override; 312 313 /// Emits call of the outlined function with the provided arguments, 314 /// translating these arguments to correct target-specific arguments. 315 void emitOutlinedFunctionCall( 316 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, 317 ArrayRef<llvm::Value *> Args = llvm::None) const override; 318 319 /// Emits OpenMP-specific function prolog. 320 /// Required for device constructs. 321 void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override; 322 323 /// Gets the OpenMP-specific address of the local variable. 324 Address getAddressOfLocalVariable(CodeGenFunction &CGF, 325 const VarDecl *VD) override; 326 327 /// Target codegen is specialized based on two data-sharing modes: CUDA, in 328 /// which the local variables are actually global threadlocal, and Generic, in 329 /// which the local variables are placed in global memory if they may escape 330 /// their declaration context. 331 enum DataSharingMode { 332 /// CUDA data sharing mode. 333 CUDA, 334 /// Generic data-sharing mode. 335 Generic, 336 }; 337 338 /// Cleans up references to the objects in finished function. 339 /// 340 void functionFinished(CodeGenFunction &CGF) override; 341 342 private: 343 /// Track the execution mode when codegening directives within a target 344 /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the 345 /// target region and used by containing directives such as 'parallel' 346 /// to emit optimized code. 347 ExecutionMode CurrentExecutionMode = EM_Unknown; 348 349 /// true if we're emitting the code for the target region and next parallel 350 /// region is L0 for sure. 351 bool IsInTargetMasterThreadRegion = false; 352 /// true if we're definitely in the parallel region. 353 bool IsInParallelRegion = false; 354 355 /// Map between an outlined function and its wrapper. 356 llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap; 357 358 /// Emit function which wraps the outline parallel region 359 /// and controls the parameters which are passed to this function. 360 /// The wrapper ensures that the outlined function is called 361 /// with the correct arguments when data is shared. 362 llvm::Function *createParallelDataSharingWrapper( 363 llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D); 364 365 /// The map of local variables to their addresses in the global memory. 366 using DeclToAddrMapTy = llvm::MapVector<const Decl *, 367 std::pair<const FieldDecl *, Address>>; 368 /// Set of the parameters passed by value escaping OpenMP context. 369 using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; 370 struct FunctionData { 371 DeclToAddrMapTy LocalVarData; 372 EscapedParamsTy EscapedParameters; 373 llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; 374 llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; 375 const RecordDecl *GlobalRecord = nullptr; 376 llvm::Value *GlobalRecordAddr = nullptr; 377 std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; 378 }; 379 /// Maps the function to the list of the globalized variables with their 380 /// addresses. 381 llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; 382 }; 383 384 } // CodeGen namespace. 385 } // clang namespace. 386 387 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 388