1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/StmtOpenMP.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Value.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cassert>
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30 /// \brief API for captured statement code generation in OpenMP constructs.
31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
32 public:
CGOpenMPRegionInfo(const OMPExecutableDirective & D,const CapturedStmt & CS,const VarDecl * ThreadIDVar)33   CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
34                      const VarDecl *ThreadIDVar)
35       : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
36         Directive(D) {
37     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
38   }
39 
40   /// \brief Gets a variable or parameter for storing global thread id
41   /// inside OpenMP construct.
getThreadIDVariable() const42   const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
43 
44   /// \brief Gets an LValue for the current ThreadID variable.
45   LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
46 
classof(const CGCapturedStmtInfo * Info)47   static bool classof(const CGCapturedStmtInfo *Info) {
48     return Info->getKind() == CR_OpenMP;
49   }
50 
51   /// \brief Emit the captured statement body.
52   void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
53 
54   /// \brief Get the name of the capture helper.
getHelperName() const55   StringRef getHelperName() const override { return ".omp_outlined."; }
56 
57 private:
58   /// \brief A variable or parameter storing global thread id for OpenMP
59   /// constructs.
60   const VarDecl *ThreadIDVar;
61   /// \brief OpenMP executable directive associated with the region.
62   const OMPExecutableDirective &Directive;
63 };
64 } // namespace
65 
getThreadIDVariableLValue(CodeGenFunction & CGF)66 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
67   return CGF.MakeNaturalAlignAddrLValue(
68       CGF.GetAddrOfLocalVar(ThreadIDVar),
69       CGF.getContext().getPointerType(ThreadIDVar->getType()));
70 }
71 
EmitBody(CodeGenFunction & CGF,Stmt * S)72 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
73   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
74   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
75   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
76   if (PrivateScope.Privatize())
77     // Emit implicit barrier to synchronize threads and avoid data races.
78     CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(),
79                                                   /*IsExplicit=*/false);
80   CGCapturedStmtInfo::EmitBody(CGF, S);
81 }
82 
CGOpenMPRuntime(CodeGenModule & CGM)83 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
84     : CGM(CGM), DefaultOpenMPPSource(nullptr) {
85   IdentTy = llvm::StructType::create(
86       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
87       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
88       CGM.Int8PtrTy /* psource */, nullptr);
89   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
90   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
91                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
92   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
93   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
94 }
95 
96 llvm::Value *
EmitOpenMPOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar)97 CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
98                                             const VarDecl *ThreadIDVar) {
99   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
100   CodeGenFunction CGF(CGM, true);
101   CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
102   CGF.CapturedStmtInfo = &CGInfo;
103   return CGF.GenerateCapturedStmtFunction(*CS);
104 }
105 
106 llvm::Value *
GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags)107 CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
108   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
109   if (!Entry) {
110     if (!DefaultOpenMPPSource) {
111       // Initialize default location for psource field of ident_t structure of
112       // all ident_t objects. Format is ";file;function;line;column;;".
113       // Taken from
114       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
115       DefaultOpenMPPSource =
116           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
117       DefaultOpenMPPSource =
118           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
119     }
120     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
121         CGM.getModule(), IdentTy, /*isConstant*/ true,
122         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
123     DefaultOpenMPLocation->setUnnamedAddr(true);
124 
125     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
126     llvm::Constant *Values[] = {Zero,
127                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
128                                 Zero, Zero, DefaultOpenMPPSource};
129     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
130     DefaultOpenMPLocation->setInitializer(Init);
131     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
132     return DefaultOpenMPLocation;
133   }
134   return Entry;
135 }
136 
EmitOpenMPUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,OpenMPLocationFlags Flags)137 llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
138     CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) {
139   // If no debug info is generated - return global default location.
140   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
141       Loc.isInvalid())
142     return GetOrCreateDefaultOpenMPLocation(Flags);
143 
144   assert(CGF.CurFn && "No function in current CodeGenFunction.");
145 
146   llvm::Value *LocValue = nullptr;
147   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
148   if (I != OpenMPLocThreadIDMap.end())
149     LocValue = I->second.DebugLoc;
150   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
151   // GetOpenMPThreadID was called before this routine.
152   if (LocValue == nullptr) {
153     // Generate "ident_t .kmpc_loc.addr;"
154     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
155     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
156     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
157     Elem.second.DebugLoc = AI;
158     LocValue = AI;
159 
160     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
161     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
162     CGF.Builder.CreateMemCpy(LocValue, GetOrCreateDefaultOpenMPLocation(Flags),
163                              llvm::ConstantExpr::getSizeOf(IdentTy),
164                              CGM.PointerAlignInBytes);
165   }
166 
167   // char **psource = &.kmpc_loc_<flags>.addr.psource;
168   auto *PSource =
169       CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
170 
171   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
172   if (OMPDebugLoc == nullptr) {
173     SmallString<128> Buffer2;
174     llvm::raw_svector_ostream OS2(Buffer2);
175     // Build debug location
176     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
177     OS2 << ";" << PLoc.getFilename() << ";";
178     if (const FunctionDecl *FD =
179             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
180       OS2 << FD->getQualifiedNameAsString();
181     }
182     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
183     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
184     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
185   }
186   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
187   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
188 
189   return LocValue;
190 }
191 
GetOpenMPThreadID(CodeGenFunction & CGF,SourceLocation Loc)192 llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF,
193                                                 SourceLocation Loc) {
194   assert(CGF.CurFn && "No function in current CodeGenFunction.");
195 
196   llvm::Value *ThreadID = nullptr;
197   // Check whether we've already cached a load of the thread id in this
198   // function.
199   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
200   if (I != OpenMPLocThreadIDMap.end()) {
201     ThreadID = I->second.ThreadID;
202     if (ThreadID != nullptr)
203       return ThreadID;
204   }
205   if (auto OMPRegionInfo =
206           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
207     // Check if this an outlined function with thread id passed as argument.
208     auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
209     auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
210     auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
211     LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
212                                           ThreadIDVar->getType());
213     ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
214     // If value loaded in entry block, cache it and use it everywhere in
215     // function.
216     if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
217       auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
218       Elem.second.ThreadID = ThreadID;
219     }
220   } else {
221     // This is not an outlined function region - need to call __kmpc_int32
222     // kmpc_global_thread_num(ident_t *loc).
223     // Generate thread id value and cache this value for use across the
224     // function.
225     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
226     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
227     llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
228     ThreadID = CGF.EmitRuntimeCall(
229         CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
230     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
231     Elem.second.ThreadID = ThreadID;
232   }
233   return ThreadID;
234 }
235 
FunctionFinished(CodeGenFunction & CGF)236 void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
237   assert(CGF.CurFn && "No function in current CodeGenFunction.");
238   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
239     OpenMPLocThreadIDMap.erase(CGF.CurFn);
240 }
241 
getIdentTyPointerTy()242 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
243   return llvm::PointerType::getUnqual(IdentTy);
244 }
245 
getKmpc_MicroPointerTy()246 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
247   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
248 }
249 
250 llvm::Constant *
CreateRuntimeFunction(OpenMPRTLFunction Function)251 CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
252   llvm::Constant *RTLFn = nullptr;
253   switch (Function) {
254   case OMPRTL__kmpc_fork_call: {
255     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
256     // microtask, ...);
257     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
258                                 getKmpc_MicroPointerTy()};
259     llvm::FunctionType *FnTy =
260         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
261     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
262     break;
263   }
264   case OMPRTL__kmpc_global_thread_num: {
265     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
266     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
267     llvm::FunctionType *FnTy =
268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
269     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
270     break;
271   }
272   case OMPRTL__kmpc_threadprivate_cached: {
273     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
274     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
275     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
276                                 CGM.VoidPtrTy, CGM.SizeTy,
277                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
278     llvm::FunctionType *FnTy =
279         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
280     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
281     break;
282   }
283   case OMPRTL__kmpc_critical: {
284     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
285     // kmp_critical_name *crit);
286     llvm::Type *TypeParams[] = {
287         getIdentTyPointerTy(), CGM.Int32Ty,
288         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
289     llvm::FunctionType *FnTy =
290         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
291     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
292     break;
293   }
294   case OMPRTL__kmpc_threadprivate_register: {
295     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
296     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
297     // typedef void *(*kmpc_ctor)(void *);
298     auto KmpcCtorTy =
299         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
300                                 /*isVarArg*/ false)->getPointerTo();
301     // typedef void *(*kmpc_cctor)(void *, void *);
302     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
303     auto KmpcCopyCtorTy =
304         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
305                                 /*isVarArg*/ false)->getPointerTo();
306     // typedef void (*kmpc_dtor)(void *);
307     auto KmpcDtorTy =
308         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
309             ->getPointerTo();
310     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
311                               KmpcCopyCtorTy, KmpcDtorTy};
312     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
313                                         /*isVarArg*/ false);
314     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
315     break;
316   }
317   case OMPRTL__kmpc_end_critical: {
318     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
319     // kmp_critical_name *crit);
320     llvm::Type *TypeParams[] = {
321         getIdentTyPointerTy(), CGM.Int32Ty,
322         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
323     llvm::FunctionType *FnTy =
324         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
325     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
326     break;
327   }
328   case OMPRTL__kmpc_cancel_barrier: {
329     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
330     // global_tid);
331     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
332     llvm::FunctionType *FnTy =
333         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
334     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
335     break;
336   }
337   // Build __kmpc_for_static_init*(
338   //               ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
339   //               kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
340   //               kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
341   //               kmp_int[32|64] incr, kmp_int[32|64] chunk);
342   case OMPRTL__kmpc_for_static_init_4: {
343     auto ITy = CGM.Int32Ty;
344     auto PtrTy = llvm::PointerType::getUnqual(ITy);
345     llvm::Type *TypeParams[] = {
346         getIdentTyPointerTy(),                     // loc
347         CGM.Int32Ty,                               // tid
348         CGM.Int32Ty,                               // schedtype
349         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
350         PtrTy,                                     // p_lower
351         PtrTy,                                     // p_upper
352         PtrTy,                                     // p_stride
353         ITy,                                       // incr
354         ITy                                        // chunk
355     };
356     llvm::FunctionType *FnTy =
357         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
358     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
359     break;
360   }
361   case OMPRTL__kmpc_for_static_init_4u: {
362     auto ITy = CGM.Int32Ty;
363     auto PtrTy = llvm::PointerType::getUnqual(ITy);
364     llvm::Type *TypeParams[] = {
365         getIdentTyPointerTy(),                     // loc
366         CGM.Int32Ty,                               // tid
367         CGM.Int32Ty,                               // schedtype
368         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
369         PtrTy,                                     // p_lower
370         PtrTy,                                     // p_upper
371         PtrTy,                                     // p_stride
372         ITy,                                       // incr
373         ITy                                        // chunk
374     };
375     llvm::FunctionType *FnTy =
376         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
377     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
378     break;
379   }
380   case OMPRTL__kmpc_for_static_init_8: {
381     auto ITy = CGM.Int64Ty;
382     auto PtrTy = llvm::PointerType::getUnqual(ITy);
383     llvm::Type *TypeParams[] = {
384         getIdentTyPointerTy(),                     // loc
385         CGM.Int32Ty,                               // tid
386         CGM.Int32Ty,                               // schedtype
387         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
388         PtrTy,                                     // p_lower
389         PtrTy,                                     // p_upper
390         PtrTy,                                     // p_stride
391         ITy,                                       // incr
392         ITy                                        // chunk
393     };
394     llvm::FunctionType *FnTy =
395         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
396     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
397     break;
398   }
399   case OMPRTL__kmpc_for_static_init_8u: {
400     auto ITy = CGM.Int64Ty;
401     auto PtrTy = llvm::PointerType::getUnqual(ITy);
402     llvm::Type *TypeParams[] = {
403         getIdentTyPointerTy(),                     // loc
404         CGM.Int32Ty,                               // tid
405         CGM.Int32Ty,                               // schedtype
406         llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
407         PtrTy,                                     // p_lower
408         PtrTy,                                     // p_upper
409         PtrTy,                                     // p_stride
410         ITy,                                       // incr
411         ITy                                        // chunk
412     };
413     llvm::FunctionType *FnTy =
414         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
416     break;
417   }
418   case OMPRTL__kmpc_for_static_fini: {
419     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
420     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
421     llvm::FunctionType *FnTy =
422         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
423     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
424     break;
425   }
426   case OMPRTL__kmpc_push_num_threads: {
427     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
428     // kmp_int32 num_threads)
429     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
430                                 CGM.Int32Ty};
431     llvm::FunctionType *FnTy =
432         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
433     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
434     break;
435   }
436   case OMPRTL__kmpc_serialized_parallel: {
437     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
438     // global_tid);
439     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
440     llvm::FunctionType *FnTy =
441         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
442     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
443     break;
444   }
445   case OMPRTL__kmpc_end_serialized_parallel: {
446     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
447     // global_tid);
448     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
449     llvm::FunctionType *FnTy =
450         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
451     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
452     break;
453   }
454   case OMPRTL__kmpc_flush: {
455     // Build void __kmpc_flush(ident_t *loc, ...);
456     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
457     llvm::FunctionType *FnTy =
458         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
459     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
460     break;
461   }
462   case OMPRTL__kmpc_master: {
463     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
464     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
465     llvm::FunctionType *FnTy =
466         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
467     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
468     break;
469   }
470   case OMPRTL__kmpc_end_master: {
471     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
472     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
473     llvm::FunctionType *FnTy =
474         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
475     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
476     break;
477   }
478   }
479   return RTLFn;
480 }
481 
482 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)483 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
484   // Lookup the entry, lazily creating it if necessary.
485   return GetOrCreateInternalVariable(CGM.Int8PtrPtrTy,
486                                      Twine(CGM.getMangledName(VD)) + ".cache.");
487 }
488 
getOMPAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,llvm::Value * VDAddr,SourceLocation Loc)489 llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF,
490                                                         const VarDecl *VD,
491                                                         llvm::Value *VDAddr,
492                                                         SourceLocation Loc) {
493   auto VarTy = VDAddr->getType()->getPointerElementType();
494   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
495                          GetOpenMPThreadID(CGF, Loc),
496                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
497                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
498                          getOrCreateThreadPrivateCache(VD)};
499   return CGF.EmitRuntimeCall(
500       CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
501 }
502 
EmitOMPThreadPrivateVarInit(CodeGenFunction & CGF,llvm::Value * VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)503 void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit(
504     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
505     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
506   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
507   // library.
508   auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc);
509   CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num),
510                       OMPLoc);
511   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
512   // to register constructor/destructor for variable.
513   llvm::Value *Args[] = {OMPLoc,
514                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
515                          Ctor, CopyCtor, Dtor};
516   CGF.EmitRuntimeCall(
517       CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
518 }
519 
EmitOMPThreadPrivateVarDefinition(const VarDecl * VD,llvm::Value * VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)520 llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition(
521     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
522     bool PerformInit, CodeGenFunction *CGF) {
523   VD = VD->getDefinition(CGM.getContext());
524   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
525     ThreadPrivateWithDefinition.insert(VD);
526     QualType ASTTy = VD->getType();
527 
528     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
529     auto Init = VD->getAnyInitializer();
530     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
531       // Generate function that re-emits the declaration's initializer into the
532       // threadprivate copy of the variable VD
533       CodeGenFunction CtorCGF(CGM);
534       FunctionArgList Args;
535       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
536                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
537       Args.push_back(&Dst);
538 
539       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
540           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
541           /*isVariadic=*/false);
542       auto FTy = CGM.getTypes().GetFunctionType(FI);
543       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
544           FTy, ".__kmpc_global_ctor_.", Loc);
545       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
546                             Args, SourceLocation());
547       auto ArgVal = CtorCGF.EmitLoadOfScalar(
548           CtorCGF.GetAddrOfLocalVar(&Dst),
549           /*Volatile=*/false, CGM.PointerAlignInBytes,
550           CGM.getContext().VoidPtrTy, Dst.getLocation());
551       auto Arg = CtorCGF.Builder.CreatePointerCast(
552           ArgVal,
553           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
554       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
555                                /*IsInitializer=*/true);
556       ArgVal = CtorCGF.EmitLoadOfScalar(
557           CtorCGF.GetAddrOfLocalVar(&Dst),
558           /*Volatile=*/false, CGM.PointerAlignInBytes,
559           CGM.getContext().VoidPtrTy, Dst.getLocation());
560       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
561       CtorCGF.FinishFunction();
562       Ctor = Fn;
563     }
564     if (VD->getType().isDestructedType() != QualType::DK_none) {
565       // Generate function that emits destructor call for the threadprivate copy
566       // of the variable VD
567       CodeGenFunction DtorCGF(CGM);
568       FunctionArgList Args;
569       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
570                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
571       Args.push_back(&Dst);
572 
573       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
574           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
575           /*isVariadic=*/false);
576       auto FTy = CGM.getTypes().GetFunctionType(FI);
577       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
578           FTy, ".__kmpc_global_dtor_.", Loc);
579       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
580                             SourceLocation());
581       auto ArgVal = DtorCGF.EmitLoadOfScalar(
582           DtorCGF.GetAddrOfLocalVar(&Dst),
583           /*Volatile=*/false, CGM.PointerAlignInBytes,
584           CGM.getContext().VoidPtrTy, Dst.getLocation());
585       DtorCGF.emitDestroy(ArgVal, ASTTy,
586                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
587                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
588       DtorCGF.FinishFunction();
589       Dtor = Fn;
590     }
591     // Do not emit init function if it is not required.
592     if (!Ctor && !Dtor)
593       return nullptr;
594 
595     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
596     auto CopyCtorTy =
597         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
598                                 /*isVarArg=*/false)->getPointerTo();
599     // Copying constructor for the threadprivate variable.
600     // Must be NULL - reserved by runtime, but currently it requires that this
601     // parameter is always NULL. Otherwise it fires assertion.
602     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
603     if (Ctor == nullptr) {
604       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
605                                             /*isVarArg=*/false)->getPointerTo();
606       Ctor = llvm::Constant::getNullValue(CtorTy);
607     }
608     if (Dtor == nullptr) {
609       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
610                                             /*isVarArg=*/false)->getPointerTo();
611       Dtor = llvm::Constant::getNullValue(DtorTy);
612     }
613     if (!CGF) {
614       auto InitFunctionTy =
615           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
616       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
617           InitFunctionTy, ".__omp_threadprivate_init_.");
618       CodeGenFunction InitCGF(CGM);
619       FunctionArgList ArgList;
620       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
621                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
622                             Loc);
623       EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
624       InitCGF.FinishFunction();
625       return InitFunction;
626     }
627     EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
628   }
629   return nullptr;
630 }
631 
EmitOMPParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)632 void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF,
633                                           SourceLocation Loc,
634                                           llvm::Value *OutlinedFn,
635                                           llvm::Value *CapturedStruct) {
636   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
637   llvm::Value *Args[] = {
638       EmitOpenMPUpdateLocation(CGF, Loc),
639       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
640       // (there is only one additional argument - 'context')
641       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
642       CGF.EmitCastToVoidPtr(CapturedStruct)};
643   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_fork_call);
644   CGF.EmitRuntimeCall(RTLFn, Args);
645 }
646 
EmitOMPSerialCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * OutlinedFn,llvm::Value * CapturedStruct)647 void CGOpenMPRuntime::EmitOMPSerialCall(CodeGenFunction &CGF,
648                                         SourceLocation Loc,
649                                         llvm::Value *OutlinedFn,
650                                         llvm::Value *CapturedStruct) {
651   auto ThreadID = GetOpenMPThreadID(CGF, Loc);
652   // Build calls:
653   // __kmpc_serialized_parallel(&Loc, GTid);
654   llvm::Value *SerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID};
655   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_serialized_parallel);
656   CGF.EmitRuntimeCall(RTLFn, SerArgs);
657 
658   // OutlinedFn(&GTid, &zero, CapturedStruct);
659   auto ThreadIDAddr = EmitThreadIDAddress(CGF, Loc);
660   auto Int32Ty =
661       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
662   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
663   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
664   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
665   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
666 
667   // __kmpc_end_serialized_parallel(&Loc, GTid);
668   llvm::Value *EndSerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID};
669   RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel);
670   CGF.EmitRuntimeCall(RTLFn, EndSerArgs);
671 }
672 
673 // If we're inside an (outlined) parallel region, use the region info's
674 // thread-ID variable (it is passed in a first argument of the outlined function
675 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
676 // regular serial code region, get thread ID by calling kmp_int32
677 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
678 // return the address of that temp.
EmitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)679 llvm::Value *CGOpenMPRuntime::EmitThreadIDAddress(CodeGenFunction &CGF,
680                                                   SourceLocation Loc) {
681   if (auto OMPRegionInfo =
682           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
683     return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
684                                 SourceLocation()).getScalarVal();
685   auto ThreadID = GetOpenMPThreadID(CGF, Loc);
686   auto Int32Ty =
687       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
688   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
689   CGF.EmitStoreOfScalar(ThreadID,
690                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
691 
692   return ThreadIDTemp;
693 }
694 
695 llvm::Constant *
GetOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name)696 CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty,
697                                              const llvm::Twine &Name) {
698   SmallString<256> Buffer;
699   llvm::raw_svector_ostream Out(Buffer);
700   Out << Name;
701   auto RuntimeName = Out.str();
702   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
703   if (Elem.second) {
704     assert(Elem.second->getType()->getPointerElementType() == Ty &&
705            "OMP internal variable has different type than requested");
706     return &*Elem.second;
707   }
708 
709   return Elem.second = new llvm::GlobalVariable(
710              CGM.getModule(), Ty, /*IsConstant*/ false,
711              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
712              Elem.first());
713 }
714 
GetCriticalRegionLock(StringRef CriticalName)715 llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) {
716   llvm::Twine Name(".gomp_critical_user_", CriticalName);
717   return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
718 }
719 
EmitOMPCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const std::function<void ()> & CriticalOpGen,SourceLocation Loc)720 void CGOpenMPRuntime::EmitOMPCriticalRegion(
721     CodeGenFunction &CGF, StringRef CriticalName,
722     const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
723   auto RegionLock = GetCriticalRegionLock(CriticalName);
724   // __kmpc_critical(ident_t *, gtid, Lock);
725   // CriticalOpGen();
726   // __kmpc_end_critical(ident_t *, gtid, Lock);
727   // Prepare arguments and build a call to __kmpc_critical
728   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
729                          GetOpenMPThreadID(CGF, Loc), RegionLock};
730   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_critical);
731   CGF.EmitRuntimeCall(RTLFn, Args);
732   CriticalOpGen();
733   // Build a call to __kmpc_end_critical
734   RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_critical);
735   CGF.EmitRuntimeCall(RTLFn, Args);
736 }
737 
EmitOMPIfStmt(CodeGenFunction & CGF,llvm::Value * IfCond,const std::function<void ()> & BodyOpGen)738 static void EmitOMPIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
739                           const std::function<void()> &BodyOpGen) {
740   llvm::Value *CallBool = CGF.EmitScalarConversion(
741       IfCond,
742       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
743       CGF.getContext().BoolTy);
744 
745   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
746   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
747   // Generate the branch (If-stmt)
748   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
749   CGF.EmitBlock(ThenBlock);
750   BodyOpGen();
751   // Emit the rest of bblocks/branches
752   CGF.EmitBranch(ContBlock);
753   CGF.EmitBlock(ContBlock, true);
754 }
755 
EmitOMPMasterRegion(CodeGenFunction & CGF,const std::function<void ()> & MasterOpGen,SourceLocation Loc)756 void CGOpenMPRuntime::EmitOMPMasterRegion(
757     CodeGenFunction &CGF, const std::function<void()> &MasterOpGen,
758     SourceLocation Loc) {
759   // if(__kmpc_master(ident_t *, gtid)) {
760   //   MasterOpGen();
761   //   __kmpc_end_master(ident_t *, gtid);
762   // }
763   // Prepare arguments and build a call to __kmpc_master
764   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
765                          GetOpenMPThreadID(CGF, Loc)};
766   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_master);
767   auto *IsMaster = CGF.EmitRuntimeCall(RTLFn, Args);
768   EmitOMPIfStmt(CGF, IsMaster, [&]() -> void {
769     MasterOpGen();
770     // Build a call to __kmpc_end_master.
771     // OpenMP [1.2.2 OpenMP Language Terminology]
772     // For C/C++, an executable statement, possibly compound, with a single
773     // entry at the top and a single exit at the bottom, or an OpenMP construct.
774     // * Access to the structured block must not be the result of a branch.
775     // * The point of exit cannot be a branch out of the structured block.
776     // * The point of entry must not be a call to setjmp().
777     // * longjmp() and throw() must not violate the entry/exit criteria.
778     // * An expression statement, iteration statement, selection statement, or
779     // try block is considered to be a structured block if the corresponding
780     // compound statement obtained by enclosing it in { and } would be a
781     // structured block.
782     // It is analyzed in Sema, so we can just call __kmpc_end_master() on
783     // fallthrough rather than pushing a normal cleanup for it.
784     RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_master);
785     CGF.EmitRuntimeCall(RTLFn, Args);
786   });
787 }
788 
EmitOMPBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,bool IsExplicit)789 void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF,
790                                          SourceLocation Loc, bool IsExplicit) {
791   // Build call __kmpc_cancel_barrier(loc, thread_id);
792   auto Flags = static_cast<OpenMPLocationFlags>(
793       OMP_IDENT_KMPC |
794       (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
795   // Build call __kmpc_cancel_barrier(loc, thread_id);
796   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
797   // one provides the same functionality and adds initial support for
798   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
799   // is provided default by the runtime library so it safe to make such
800   // replacement.
801   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, Flags),
802                          GetOpenMPThreadID(CGF, Loc)};
803   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_cancel_barrier);
804   CGF.EmitRuntimeCall(RTLFn, Args);
805 }
806 
807 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
808 /// the enum sched_type in kmp.h).
809 enum OpenMPSchedType {
810   /// \brief Lower bound for default (unordered) versions.
811   OMP_sch_lower = 32,
812   OMP_sch_static_chunked = 33,
813   OMP_sch_static = 34,
814   OMP_sch_dynamic_chunked = 35,
815   OMP_sch_guided_chunked = 36,
816   OMP_sch_runtime = 37,
817   OMP_sch_auto = 38,
818   /// \brief Lower bound for 'ordered' versions.
819   OMP_ord_lower = 64,
820   /// \brief Lower bound for 'nomerge' versions.
821   OMP_nm_lower = 160,
822 };
823 
824 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked)825 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
826                                           bool Chunked) {
827   switch (ScheduleKind) {
828   case OMPC_SCHEDULE_static:
829     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
830   case OMPC_SCHEDULE_dynamic:
831     return OMP_sch_dynamic_chunked;
832   case OMPC_SCHEDULE_guided:
833     return OMP_sch_guided_chunked;
834   case OMPC_SCHEDULE_auto:
835     return OMP_sch_auto;
836   case OMPC_SCHEDULE_runtime:
837     return OMP_sch_runtime;
838   case OMPC_SCHEDULE_unknown:
839     assert(!Chunked && "chunk was specified but schedule kind not known");
840     return OMP_sch_static;
841   }
842   llvm_unreachable("Unexpected runtime schedule");
843 }
844 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const845 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
846                                          bool Chunked) const {
847   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
848   return Schedule == OMP_sch_static;
849 }
850 
EmitOMPForInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind,unsigned IVSize,bool IVSigned,llvm::Value * IL,llvm::Value * LB,llvm::Value * UB,llvm::Value * ST,llvm::Value * Chunk)851 void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
852                                      OpenMPScheduleClauseKind ScheduleKind,
853                                      unsigned IVSize, bool IVSigned,
854                                      llvm::Value *IL, llvm::Value *LB,
855                                      llvm::Value *UB, llvm::Value *ST,
856                                      llvm::Value *Chunk) {
857   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
858   // Call __kmpc_for_static_init(
859   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
860   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
861   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
862   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
863   // TODO: Implement dynamic schedule.
864 
865   // If the Chunk was not specified in the clause - use default value 1.
866   if (Chunk == nullptr)
867     Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
868 
869   llvm::Value *Args[] = {
870       EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
871       GetOpenMPThreadID(CGF, Loc),
872       CGF.Builder.getInt32(Schedule), // Schedule type
873       IL,                             // &isLastIter
874       LB,                             // &LB
875       UB,                             // &UB
876       ST,                             // &Stride
877       CGF.Builder.getIntN(IVSize, 1), // Incr
878       Chunk                           // Chunk
879   };
880   assert((IVSize == 32 || IVSize == 64) &&
881          "Index size is not compatible with the omp runtime");
882   auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
883                                     : OMPRTL__kmpc_for_static_init_4u)
884                         : (IVSigned ? OMPRTL__kmpc_for_static_init_8
885                                     : OMPRTL__kmpc_for_static_init_8u);
886   auto RTLFn = CreateRuntimeFunction(F);
887   CGF.EmitRuntimeCall(RTLFn, Args);
888 }
889 
EmitOMPForFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPScheduleClauseKind ScheduleKind)890 void CGOpenMPRuntime::EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc,
891                                        OpenMPScheduleClauseKind ScheduleKind) {
892   assert((ScheduleKind == OMPC_SCHEDULE_static ||
893           ScheduleKind == OMPC_SCHEDULE_unknown) &&
894          "Non-static schedule kinds are not yet implemented");
895   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
896   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
897                          GetOpenMPThreadID(CGF, Loc)};
898   auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini);
899   CGF.EmitRuntimeCall(RTLFn, Args);
900 }
901 
EmitOMPNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)902 void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
903                                               llvm::Value *NumThreads,
904                                               SourceLocation Loc) {
905   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
906   llvm::Value *Args[] = {
907       EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc),
908       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
909   llvm::Constant *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_push_num_threads);
910   CGF.EmitRuntimeCall(RTLFn, Args);
911 }
912 
EmitOMPFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc)913 void CGOpenMPRuntime::EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
914                                    SourceLocation Loc) {
915   // Build call void __kmpc_flush(ident_t *loc, ...)
916   // FIXME: List of variables is ignored by libiomp5 runtime, no need to
917   // generate it, just request full memory fence.
918   llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
919                          llvm::ConstantInt::get(CGM.Int32Ty, 0)};
920   auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush);
921   CGF.EmitRuntimeCall(RTLFn, Args);
922 }
923