1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 // XXXAR: TODO fix default address space in this file as well
14 #define getUnqual(arg) get(arg, 0u)
15 
16 #include "CGOpenMPRuntime.h"
17 #include "CGCXXABI.h"
18 #include "CGCleanup.h"
19 #include "CGRecordLayout.h"
20 #include "CodeGenFunction.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/BitmaskEnum.h"
27 #include "clang/Basic/FileManager.h"
28 #include "clang/Basic/OpenMPKinds.h"
29 #include "clang/Basic/SourceManager.h"
30 #include "clang/CodeGen/ConstantInitBuilder.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/SetOperations.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/GlobalValue.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
emitUntiedSwitch(CodeGenFunction &)92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
getRegionKind() const94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
getDirectiveKind() const96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
hasCancel() const98   bool hasCancel() const { return HasCancel; }
99 
classof(const CGCapturedStmtInfo * Info)100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
getThreadIDVariable() const128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
getHelperName() const131   StringRef getHelperName() const override { return HelperName; }
132 
classof(const CGCapturedStmtInfo * Info)133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
emitUntiedSwitch(CodeGenFunction & CGF) const177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
getNumberOfParts() const195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
getThreadIDVariable() const209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
getHelperName() const215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
emitUntiedSwitch(CodeGenFunction & CGF)217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
classof(const CGCapturedStmtInfo * Info)221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
getContextValue() const247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
setContextValue(llvm::Value * V)253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
getThisFieldDecl() const270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
getThreadIDVariable() const278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
getHelperName() const292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
emitUntiedSwitch(CodeGenFunction & CGF)298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
getOldCSI() const303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
classof(const CGCapturedStmtInfo * Info)305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
getThreadIDVariable() const333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
getHelperName() const336   StringRef getHelperName() const override { return HelperName; }
337 
classof(const CGCapturedStmtInfo * Info)338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(
375           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
376     }
377     (void)PrivScope.Privatize();
378   }
379 
380   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const381   const FieldDecl *lookup(const VarDecl *VD) const override {
382     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383       return FD;
384     return nullptr;
385   }
386 
387   /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)388   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389     llvm_unreachable("No body for expressions");
390   }
391 
392   /// Get a variable or parameter for storing global thread id
393   /// inside OpenMP construct.
getThreadIDVariable() const394   const VarDecl *getThreadIDVariable() const override {
395     llvm_unreachable("No thread id for expressions");
396   }
397 
398   /// Get the name of the capture helper.
getHelperName() const399   StringRef getHelperName() const override {
400     llvm_unreachable("No helper name for expressions");
401   }
402 
classof(const CGCapturedStmtInfo * Info)403   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404 
405 private:
406   /// Private scope to capture global variables.
407   CodeGenFunction::OMPPrivateScope PrivScope;
408 };
409 
410 /// RAII for emitting code of OpenMP constructs.
411 class InlinedOpenMPRegionRAII {
412   CodeGenFunction &CGF;
413   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
414   FieldDecl *LambdaThisCaptureField = nullptr;
415   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel)
424       : CGF(CGF) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430     CGF.LambdaThisCaptureField = nullptr;
431     BlockInfo = CGF.BlockInfo;
432     CGF.BlockInfo = nullptr;
433   }
434 
~InlinedOpenMPRegionRAII()435   ~InlinedOpenMPRegionRAII() {
436     // Restore original CapturedStmtInfo only if we're done with code emission.
437     auto *OldCSI =
438         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439     delete CGF.CapturedStmtInfo;
440     CGF.CapturedStmtInfo = OldCSI;
441     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
442     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
443     CGF.BlockInfo = BlockInfo;
444   }
445 };
446 
447 /// Values for bit flags used in the ident_t to describe the fields.
448 /// All enumeric elements are named and described in accordance with the code
449 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
450 enum OpenMPLocationFlags : unsigned {
451   /// Use trampoline for internal microtask.
452   OMP_IDENT_IMD = 0x01,
453   /// Use c-style ident structure.
454   OMP_IDENT_KMPC = 0x02,
455   /// Atomic reduction option for kmpc_reduce.
456   OMP_ATOMIC_REDUCE = 0x10,
457   /// Explicit 'barrier' directive.
458   OMP_IDENT_BARRIER_EXPL = 0x20,
459   /// Implicit barrier in code.
460   OMP_IDENT_BARRIER_IMPL = 0x40,
461   /// Implicit barrier in 'for' directive.
462   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
463   /// Implicit barrier in 'sections' directive.
464   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
465   /// Implicit barrier in 'single' directive.
466   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
467   /// Call of __kmp_for_static_init for static loop.
468   OMP_IDENT_WORK_LOOP = 0x200,
469   /// Call of __kmp_for_static_init for sections.
470   OMP_IDENT_WORK_SECTIONS = 0x400,
471   /// Call of __kmp_for_static_init for distribute.
472   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
473   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
474 };
475 
476 namespace {
477 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
478 /// Values for bit flags for marking which requires clauses have been used.
479 enum OpenMPOffloadingRequiresDirFlags : int64_t {
480   /// flag undefined.
481   OMP_REQ_UNDEFINED               = 0x000,
482   /// no requires clause present.
483   OMP_REQ_NONE                    = 0x001,
484   /// reverse_offload clause.
485   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
486   /// unified_address clause.
487   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
488   /// unified_shared_memory clause.
489   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
490   /// dynamic_allocators clause.
491   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
492   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
493 };
494 
495 enum OpenMPOffloadingReservedDeviceIDs {
496   /// Device ID if the device was not defined, runtime should get it
497   /// from environment variables in the spec.
498   OMP_DEVICEID_UNDEF = -1,
499 };
500 } // anonymous namespace
501 
502 /// Describes ident structure that describes a source location.
503 /// All descriptions are taken from
504 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
505 /// Original structure:
506 /// typedef struct ident {
507 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
508 ///                                  see above  */
509 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
510 ///                                  KMP_IDENT_KMPC identifies this union
511 ///                                  member  */
512 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
513 ///                                  see above */
514 ///#if USE_ITT_BUILD
515 ///                            /*  but currently used for storing
516 ///                                region-specific ITT */
517 ///                            /*  contextual information. */
518 ///#endif /* USE_ITT_BUILD */
519 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
520 ///                                 C++  */
521 ///    char const *psource;    /**< String describing the source location.
522 ///                            The string is composed of semi-colon separated
523 //                             fields which describe the source file,
524 ///                            the function and a pair of line numbers that
525 ///                            delimit the construct.
526 ///                             */
527 /// } ident_t;
528 enum IdentFieldIndex {
529   /// might be used in Fortran
530   IdentField_Reserved_1,
531   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
532   IdentField_Flags,
533   /// Not really used in Fortran any more
534   IdentField_Reserved_2,
535   /// Source[4] in Fortran, do not use for C++
536   IdentField_Reserved_3,
537   /// String describing the source location. The string is composed of
538   /// semi-colon separated fields which describe the source file, the function
539   /// and a pair of line numbers that delimit the construct.
540   IdentField_PSource
541 };
542 
543 /// Schedule types for 'omp for' loops (these enumerators are taken from
544 /// the enum sched_type in kmp.h).
545 enum OpenMPSchedType {
546   /// Lower bound for default (unordered) versions.
547   OMP_sch_lower = 32,
548   OMP_sch_static_chunked = 33,
549   OMP_sch_static = 34,
550   OMP_sch_dynamic_chunked = 35,
551   OMP_sch_guided_chunked = 36,
552   OMP_sch_runtime = 37,
553   OMP_sch_auto = 38,
554   /// static with chunk adjustment (e.g., simd)
555   OMP_sch_static_balanced_chunked = 45,
556   /// Lower bound for 'ordered' versions.
557   OMP_ord_lower = 64,
558   OMP_ord_static_chunked = 65,
559   OMP_ord_static = 66,
560   OMP_ord_dynamic_chunked = 67,
561   OMP_ord_guided_chunked = 68,
562   OMP_ord_runtime = 69,
563   OMP_ord_auto = 70,
564   OMP_sch_default = OMP_sch_static,
565   /// dist_schedule types
566   OMP_dist_sch_static_chunked = 91,
567   OMP_dist_sch_static = 92,
568   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
569   /// Set if the monotonic schedule modifier was present.
570   OMP_sch_modifier_monotonic = (1 << 29),
571   /// Set if the nonmonotonic schedule modifier was present.
572   OMP_sch_modifier_nonmonotonic = (1 << 30),
573 };
574 
575 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
576 /// region.
577 class CleanupTy final : public EHScopeStack::Cleanup {
578   PrePostActionTy *Action;
579 
580 public:
CleanupTy(PrePostActionTy * Action)581   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)582   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
583     if (!CGF.HaveInsertPoint())
584       return;
585     Action->Exit(CGF);
586   }
587 };
588 
589 } // anonymous namespace
590 
operator ()(CodeGenFunction & CGF) const591 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
592   CodeGenFunction::RunCleanupsScope Scope(CGF);
593   if (PrePostAction) {
594     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
595     Callback(CodeGen, CGF, *PrePostAction);
596   } else {
597     PrePostActionTy Action;
598     Callback(CodeGen, CGF, Action);
599   }
600 }
601 
602 /// Check if the combiner is a call to UDR combiner and if it is so return the
603 /// UDR decl used for reduction.
604 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)605 getReductionInit(const Expr *ReductionOp) {
606   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
607     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
608       if (const auto *DRE =
609               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
610         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
611           return DRD;
612   return nullptr;
613 }
614 
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)615 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
616                                              const OMPDeclareReductionDecl *DRD,
617                                              const Expr *InitOp,
618                                              Address Private, Address Original,
619                                              QualType Ty) {
620   if (DRD->getInitializer()) {
621     std::pair<llvm::Function *, llvm::Function *> Reduction =
622         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
623     const auto *CE = cast<CallExpr>(InitOp);
624     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
625     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
626     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
627     const auto *LHSDRE =
628         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
629     const auto *RHSDRE =
630         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
631     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
632     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
633                             [=]() { return Private; });
634     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
635                             [=]() { return Original; });
636     (void)PrivateScope.Privatize();
637     RValue Func = RValue::get(Reduction.second);
638     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
639     CGF.EmitIgnoredExpr(InitOp);
640   } else {
641     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
642     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
643     auto *GV = new llvm::GlobalVariable(
644         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
645         llvm::GlobalValue::PrivateLinkage, Init, Name);
646     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
647     RValue InitRVal;
648     switch (CGF.getEvaluationKind(Ty)) {
649     case TEK_Scalar:
650       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
651       break;
652     case TEK_Complex:
653       InitRVal =
654           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
655       break;
656     case TEK_Aggregate:
657       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
658       break;
659     }
660     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
661     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
662     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                          /*IsInitializer=*/false);
664   }
665 }
666 
667 /// Emit initialization of arrays of complex types.
668 /// \param DestAddr Address of the array.
669 /// \param Type Type of array.
670 /// \param Init Initial expression of array.
671 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())672 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
673                                  QualType Type, bool EmitDeclareReductionInit,
674                                  const Expr *Init,
675                                  const OMPDeclareReductionDecl *DRD,
676                                  Address SrcAddr = Address::invalid()) {
677   // Perform element-by-element initialization.
678   QualType ElementTy;
679 
680   // Drill down to the base element type on both arrays.
681   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
682   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
683   DestAddr =
684       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
685   if (DRD)
686     SrcAddr =
687         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
688 
689   llvm::Value *SrcBegin = nullptr;
690   if (DRD)
691     SrcBegin = SrcAddr.getPointer();
692   llvm::Value *DestBegin = DestAddr.getPointer();
693   // Cast from pointer to array type to pointer to single element.
694   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
695   // The basic structure here is a while-do loop.
696   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
697   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
698   llvm::Value *IsEmpty =
699       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
700   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
701 
702   // Enter the loop body, making that address the current address.
703   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
704   CGF.EmitBlock(BodyBB);
705 
706   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
707 
708   llvm::PHINode *SrcElementPHI = nullptr;
709   Address SrcElementCurrent = Address::invalid();
710   if (DRD) {
711     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
712                                           "omp.arraycpy.srcElementPast");
713     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
714     SrcElementCurrent =
715         Address(SrcElementPHI,
716                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
717   }
718   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
719       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
720   DestElementPHI->addIncoming(DestBegin, EntryBB);
721   Address DestElementCurrent =
722       Address(DestElementPHI,
723               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724 
725   // Emit copy.
726   {
727     CodeGenFunction::RunCleanupsScope InitScope(CGF);
728     if (EmitDeclareReductionInit) {
729       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
730                                        SrcElementCurrent, ElementTy);
731     } else
732       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
733                            /*IsInitializer=*/false);
734   }
735 
736   if (DRD) {
737     // Shift the address forward by one element.
738     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
739         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
740     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
741   }
742 
743   // Shift the address forward by one element.
744   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
745       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
746   // Check whether we've reached the end.
747   llvm::Value *Done =
748       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
749   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
750   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
751 
752   // Done.
753   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
754 }
755 
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)756 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
757   return CGF.EmitOMPSharedLValue(E);
758 }
759 
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)760 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
761                                             const Expr *E) {
762   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
763     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
764   return LValue();
765 }
766 
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,const OMPDeclareReductionDecl * DRD)767 void ReductionCodeGen::emitAggregateInitialization(
768     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
769     const OMPDeclareReductionDecl *DRD) {
770   // Emit VarDecl with copy init for arrays.
771   // Get the address of the original variable captured in current
772   // captured region.
773   const auto *PrivateVD =
774       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
775   bool EmitDeclareReductionInit =
776       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
777   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
778                        EmitDeclareReductionInit,
779                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
780                                                 : PrivateVD->getInit(),
781                        DRD, SharedLVal.getAddress(CGF));
782 }
783 
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)784 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
785                                    ArrayRef<const Expr *> Origs,
786                                    ArrayRef<const Expr *> Privates,
787                                    ArrayRef<const Expr *> ReductionOps) {
788   ClausesData.reserve(Shareds.size());
789   SharedAddresses.reserve(Shareds.size());
790   Sizes.reserve(Shareds.size());
791   BaseDecls.reserve(Shareds.size());
792   const auto *IOrig = Origs.begin();
793   const auto *IPriv = Privates.begin();
794   const auto *IRed = ReductionOps.begin();
795   for (const Expr *Ref : Shareds) {
796     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
797     std::advance(IOrig, 1);
798     std::advance(IPriv, 1);
799     std::advance(IRed, 1);
800   }
801 }
802 
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)803 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
804   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
805          "Number of generated lvalues must be exactly N.");
806   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
807   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
808   SharedAddresses.emplace_back(First, Second);
809   if (ClausesData[N].Shared == ClausesData[N].Ref) {
810     OrigAddresses.emplace_back(First, Second);
811   } else {
812     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
813     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
814     OrigAddresses.emplace_back(First, Second);
815   }
816 }
817 
emitAggregateType(CodeGenFunction & CGF,unsigned N)818 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
819   const auto *PrivateVD =
820       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
821   QualType PrivateType = PrivateVD->getType();
822   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
823   if (!PrivateType->isVariablyModifiedType()) {
824     Sizes.emplace_back(
825         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
826         nullptr);
827     return;
828   }
829   llvm::Value *Size;
830   llvm::Value *SizeInChars;
831   auto *ElemType =
832       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
833           ->getElementType();
834   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
835   if (AsArraySection) {
836     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
837                                      OrigAddresses[N].first.getPointer(CGF));
838     Size = CGF.Builder.CreateNUWAdd(
839         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
840     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
841   } else {
842     SizeInChars =
843         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
844     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
845   }
846   Sizes.emplace_back(SizeInChars, Size);
847   CodeGenFunction::OpaqueValueMapping OpaqueMap(
848       CGF,
849       cast<OpaqueValueExpr>(
850           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
851       RValue::get(Size));
852   CGF.EmitVariablyModifiedType(PrivateType);
853 }
854 
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)855 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
856                                          llvm::Value *Size) {
857   const auto *PrivateVD =
858       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
859   QualType PrivateType = PrivateVD->getType();
860   if (!PrivateType->isVariablyModifiedType()) {
861     assert(!Size && !Sizes[N].second &&
862            "Size should be nullptr for non-variably modified reduction "
863            "items.");
864     return;
865   }
866   CodeGenFunction::OpaqueValueMapping OpaqueMap(
867       CGF,
868       cast<OpaqueValueExpr>(
869           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
870       RValue::get(Size));
871   CGF.EmitVariablyModifiedType(PrivateType);
872 }
873 
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)874 void ReductionCodeGen::emitInitialization(
875     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
876     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
877   assert(SharedAddresses.size() > N && "No variable was generated");
878   const auto *PrivateVD =
879       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
880   const OMPDeclareReductionDecl *DRD =
881       getReductionInit(ClausesData[N].ReductionOp);
882   QualType PrivateType = PrivateVD->getType();
883   PrivateAddr = CGF.Builder.CreateElementBitCast(
884       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
885   QualType SharedType = SharedAddresses[N].first.getType();
886   SharedLVal = CGF.MakeAddrLValue(
887       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
888                                        CGF.ConvertTypeForMem(SharedType)),
889       SharedType, SharedAddresses[N].first.getBaseInfo(),
890       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
891   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
892     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
893   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
894     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
895                                      PrivateAddr, SharedLVal.getAddress(CGF),
896                                      SharedLVal.getType());
897   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
898              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
899     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
900                          PrivateVD->getType().getQualifiers(),
901                          /*IsInitializer=*/false);
902   }
903 }
904 
needCleanups(unsigned N)905 bool ReductionCodeGen::needCleanups(unsigned N) {
906   const auto *PrivateVD =
907       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
908   QualType PrivateType = PrivateVD->getType();
909   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
910   return DTorKind != QualType::DK_none;
911 }
912 
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
914                                     Address PrivateAddr) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   if (needCleanups(N)) {
920     PrivateAddr = CGF.Builder.CreateElementBitCast(
921         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
922     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
923   }
924 }
925 
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
927                           LValue BaseLV) {
928   BaseTy = BaseTy.getNonReferenceType();
929   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
930          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
931     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
932       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
933     } else {
934       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
935       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
936     }
937     BaseTy = BaseTy->getPointeeType();
938   }
939   return CGF.MakeAddrLValue(
940       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
941                                        CGF.ConvertTypeForMem(ElTy)),
942       BaseLV.getType(), BaseLV.getBaseInfo(),
943       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
944 }
945 
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,llvm::Type * BaseLVType,CharUnits BaseLVAlignment,llvm::Value * Addr)946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
947                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
948                           llvm::Value *Addr) {
949   Address Tmp = Address::invalid();
950   Address TopTmp = Address::invalid();
951   Address MostTopTmp = Address::invalid();
952   BaseTy = BaseTy.getNonReferenceType();
953   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
954          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
955     Tmp = CGF.CreateMemTemp(BaseTy);
956     if (TopTmp.isValid())
957       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
958     else
959       MostTopTmp = Tmp;
960     TopTmp = Tmp;
961     BaseTy = BaseTy->getPointeeType();
962   }
963   llvm::Type *Ty = BaseLVType;
964   if (Tmp.isValid())
965     Ty = Tmp.getElementType();
966   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
967   if (Tmp.isValid()) {
968     CGF.Builder.CreateStore(Addr, Tmp);
969     return MostTopTmp;
970   }
971   return Address(Addr, BaseLVAlignment);
972 }
973 
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
975   const VarDecl *OrigVD = nullptr;
976   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
977     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
978     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
979       Base = TempOASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
985     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   }
991   return OrigVD;
992 }
993 
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
995                                                Address PrivateAddr) {
996   const DeclRefExpr *DE;
997   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
998     BaseDecls.emplace_back(OrigVD);
999     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1000     LValue BaseLValue =
1001         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1002                     OriginalBaseLValue);
1003     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1004         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1005     llvm::Value *PrivatePointer =
1006         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1007             PrivateAddr.getPointer(),
1008             SharedAddresses[N].first.getAddress(CGF).getType());
1009     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1010     return castToBase(CGF, OrigVD->getType(),
1011                       SharedAddresses[N].first.getType(),
1012                       OriginalBaseLValue.getAddress(CGF).getType(),
1013                       OriginalBaseLValue.getAlignment(), Ptr);
1014   }
1015   BaseDecls.emplace_back(
1016       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1017   return PrivateAddr;
1018 }
1019 
usesReductionInitializer(unsigned N) const1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1021   const OMPDeclareReductionDecl *DRD =
1022       getReductionInit(ClausesData[N].ReductionOp);
1023   return DRD && DRD->getInitializer();
1024 }
1025 
getThreadIDVariableLValue(CodeGenFunction & CGF)1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1027   return CGF.EmitLoadOfPointerLValue(
1028       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1029       getThreadIDVariable()->getType()->castAs<PointerType>());
1030 }
1031 
EmitBody(CodeGenFunction & CGF,const Stmt *)1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1033   if (!CGF.HaveInsertPoint())
1034     return;
1035   // 1.2.2 OpenMP Language Terminology
1036   // Structured block - An executable statement with a single entry at the
1037   // top and a single exit at the bottom.
1038   // The point of exit cannot be a branch out of the structured block.
1039   // longjmp() and throw() must not violate the entry/exit criteria.
1040   CGF.EHStack.pushTerminate();
1041   CodeGen(CGF);
1042   CGF.EHStack.popTerminate();
1043 }
1044 
getThreadIDVariableLValue(CodeGenFunction & CGF)1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1046     CodeGenFunction &CGF) {
1047   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1048                             getThreadIDVariable()->getType(),
1049                             AlignmentSource::Decl);
1050 }
1051 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1053                                        QualType FieldTy) {
1054   auto *Field = FieldDecl::Create(
1055       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1056       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1057       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1058   Field->setAccess(AS_public);
1059   DC->addDecl(Field);
1060   return Field;
1061 }
1062 
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1064                                  StringRef Separator)
1065     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1066       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1067   ASTContext &C = CGM.getContext();
1068   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1069   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1070   RD->startDefinition();
1071   // reserved_1
1072   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1073   // flags
1074   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1075   // reserved_2
1076   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1077   // reserved_3
1078   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1079   // psource
1080   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1081   RD->completeDefinition();
1082   IdentQTy = C.getRecordType(RD);
1083   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
clear()1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
getName(ArrayRef<StringRef> Parts) const1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
PushAndPopStackRAII__anone0633a090811::PushAndPopStackRAII1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1244         {FiniCB, OMPD_parallel, HasCancel});
1245     OMPBuilder->pushFinalizationCB(std::move(FI));
1246   }
~PushAndPopStackRAII__anone0633a090811::PushAndPopStackRAII1247   ~PushAndPopStackRAII() {
1248     if (OMPBuilder)
1249       OMPBuilder->popFinalizationCB();
1250   }
1251   llvm::OpenMPIRBuilder *OMPBuilder;
1252 };
1253 } // namespace
1254 
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1256     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1257     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1258     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1259   assert(ThreadIDVar->getType()->isPointerType() &&
1260          "thread id variable must be of type kmp_int32 *");
1261   CodeGenFunction CGF(CGM, true);
1262   bool HasCancel = false;
1263   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1266     HasCancel = OPD->hasCancel();
1267   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1268     HasCancel = OPSD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278   else if (const auto *OPFD =
1279                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1280     HasCancel = OPFD->hasCancel();
1281 
1282   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1283   //       parallel region to make cancellation barriers work properly.
1284   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1285   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1286   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1287                                     HasCancel, OutlinedHelperName);
1288   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1289   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1290 }
1291 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1293     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1296   return emitParallelOrTeamsOutlinedFunction(
1297       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299 
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1301     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1303   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1304   return emitParallelOrTeamsOutlinedFunction(
1305       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1306 }
1307 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1309     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1310     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1311     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1312     bool Tied, unsigned &NumberOfParts) {
1313   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1314                                               PrePostActionTy &) {
1315     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1316     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1317     llvm::Value *TaskArgs[] = {
1318         UpLoc, ThreadID,
1319         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1320                                     TaskTVar->getType()->castAs<PointerType>())
1321             .getPointer(CGF)};
1322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1323                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1324                         TaskArgs);
1325   };
1326   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1327                                                             UntiedCodeGen);
1328   CodeGen.setAction(Action);
1329   assert(!ThreadIDVar->getType()->isPointerType() &&
1330          "thread id variable must be of type kmp_int32 for tasks");
1331   const OpenMPDirectiveKind Region =
1332       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1333                                                       : OMPD_task;
1334   const CapturedStmt *CS = D.getCapturedStmt(Region);
1335   bool HasCancel = false;
1336   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1343     HasCancel = TD->hasCancel();
1344 
1345   CodeGenFunction CGF(CGM, true);
1346   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1347                                         InnermostKind, HasCancel, Action);
1348   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1350   if (!Tied)
1351     NumberOfParts = Action.getNumberOfParts();
1352   return Res;
1353 }
1354 
buildStructValue(ConstantStructBuilder & Fields,CodeGenModule & CGM,const RecordDecl * RD,const CGRecordLayout & RL,ArrayRef<llvm::Constant * > Data)1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1356                              const RecordDecl *RD, const CGRecordLayout &RL,
1357                              ArrayRef<llvm::Constant *> Data) {
1358   llvm::StructType *StructTy = RL.getLLVMType();
1359   unsigned PrevIdx = 0;
1360   ConstantInitBuilder CIBuilder(CGM);
1361   auto DI = Data.begin();
1362   for (const FieldDecl *FD : RD->fields()) {
1363     unsigned Idx = RL.getLLVMFieldNo(FD);
1364     // Fill the alignment.
1365     for (unsigned I = PrevIdx; I < Idx; ++I)
1366       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1367     PrevIdx = Idx + 1;
1368     Fields.add(*DI);
1369     ++DI;
1370   }
1371 }
1372 
1373 template <class... As>
1374 static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule & CGM,QualType Ty,bool IsConstant,ArrayRef<llvm::Constant * > Data,const Twine & Name,As &&...Args)1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1376                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1377                    As &&... Args) {
1378   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1379   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1380   ConstantInitBuilder CIBuilder(CGM);
1381   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1382   buildStructValue(Fields, CGM, RD, RL, Data);
1383   return Fields.finishAndCreateGlobal(
1384       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1385       std::forward<As>(Args)...);
1386 }
1387 
1388 template <typename T>
1389 static void
createConstantGlobalStructAndAddToParent(CodeGenModule & CGM,QualType Ty,ArrayRef<llvm::Constant * > Data,T & Parent)1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1391                                          ArrayRef<llvm::Constant *> Data,
1392                                          T &Parent) {
1393   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1394   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1395   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1396   buildStructValue(Fields, CGM, RD, RL, Data);
1397   Fields.finishAndAddTo(Parent);
1398 }
1399 
getOrCreateDefaultLocation(unsigned Flags)1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1401   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1402   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1403   FlagsTy FlagsKey(Flags, Reserved2Flags);
1404   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1405   if (!Entry) {
1406     if (!DefaultOpenMPPSource) {
1407       // Initialize default location for psource field of ident_t structure of
1408       // all ident_t objects. Format is ";file;function;line;column;;".
1409       // Taken from
1410       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1411       DefaultOpenMPPSource =
1412           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1413       DefaultOpenMPPSource =
1414           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1415     }
1416 
1417     llvm::Constant *Data[] = {
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1419         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1420         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1421         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1422     llvm::GlobalValue *DefaultOpenMPLocation =
1423         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1424                            llvm::GlobalValue::PrivateLinkage);
1425     DefaultOpenMPLocation->setUnnamedAddr(
1426         llvm::GlobalValue::UnnamedAddr::Global);
1427 
1428     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1429   }
1430   return Address(Entry, Align);
1431 }
1432 
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1434                                              bool AtCurrentPoint) {
1435   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1436   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1437 
1438   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1439   if (AtCurrentPoint) {
1440     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1441         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1442   } else {
1443     Elem.second.ServiceInsertPt =
1444         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1445     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1446   }
1447 }
1448 
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1450   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1451   if (Elem.second.ServiceInsertPt) {
1452     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1453     Elem.second.ServiceInsertPt = nullptr;
1454     Ptr->eraseFromParent();
1455   }
1456 }
1457 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1458 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1459                                                  SourceLocation Loc,
1460                                                  unsigned Flags) {
1461   Flags |= OMP_IDENT_KMPC;
1462   // If no debug info is generated - return global default location.
1463   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1464       Loc.isInvalid())
1465     return getOrCreateDefaultLocation(Flags).getPointer();
1466 
1467   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1468 
1469   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1470   Address LocValue = Address::invalid();
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end())
1473     LocValue = Address(I->second.DebugLoc, Align);
1474 
1475   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1476   // GetOpenMPThreadID was called before this routine.
1477   if (!LocValue.isValid()) {
1478     // Generate "ident_t .kmpc_loc.addr;"
1479     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1480     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1481     Elem.second.DebugLoc = AI.getPointer();
1482     LocValue = AI;
1483 
1484     if (!Elem.second.ServiceInsertPt)
1485       setLocThreadIdInsertPt(CGF);
1486     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1487     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1488     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1489                              CGF.getTypeSize(IdentQTy));
1490   }
1491 
1492   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1493   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1494   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1495   LValue PSource =
1496       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1497 
1498   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1499   if (OMPDebugLoc == nullptr) {
1500     SmallString<128> Buffer2;
1501     llvm::raw_svector_ostream OS2(Buffer2);
1502     // Build debug location
1503     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1504     OS2 << ";" << PLoc.getFilename() << ";";
1505     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1506       OS2 << FD->getQualifiedNameAsString();
1507     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1508     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1509     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1510   }
1511   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1512   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1513 
1514   // Our callers always pass this to a runtime function, so for
1515   // convenience, go ahead and return a naked pointer.
1516   return LocValue.getPointer();
1517 }
1518 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1519 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1520                                           SourceLocation Loc) {
1521   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1522 
1523   llvm::Value *ThreadID = nullptr;
1524   // Check whether we've already cached a load of the thread id in this
1525   // function.
1526   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1527   if (I != OpenMPLocThreadIDMap.end()) {
1528     ThreadID = I->second.ThreadID;
1529     if (ThreadID != nullptr)
1530       return ThreadID;
1531   }
1532   // If exceptions are enabled, do not use parameter to avoid possible crash.
1533   if (auto *OMPRegionInfo =
1534           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1535     if (OMPRegionInfo->getThreadIDVariable()) {
1536       // Check if this an outlined function with thread id passed as argument.
1537       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1538       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1539       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1540           !CGF.getLangOpts().CXXExceptions ||
1541           CGF.Builder.GetInsertBlock() == TopBlock ||
1542           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1543           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1544               TopBlock ||
1545           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1546               CGF.Builder.GetInsertBlock()) {
1547         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1548         // If value loaded in entry block, cache it and use it everywhere in
1549         // function.
1550         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1551           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1552           Elem.second.ThreadID = ThreadID;
1553         }
1554         return ThreadID;
1555       }
1556     }
1557   }
1558 
1559   // This is not an outlined function region - need to call __kmpc_int32
1560   // kmpc_global_thread_num(ident_t *loc).
1561   // Generate thread id value and cache this value for use across the
1562   // function.
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (!Elem.second.ServiceInsertPt)
1565     setLocThreadIdInsertPt(CGF);
1566   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1567   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1568   llvm::CallInst *Call = CGF.Builder.CreateCall(
1569       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1570                                             OMPRTL___kmpc_global_thread_num),
1571       emitUpdateLocation(CGF, Loc));
1572   Call->setCallingConv(CGF.getRuntimeCC());
1573   Elem.second.ThreadID = Call;
1574   return Call;
1575 }
1576 
functionFinished(CodeGenFunction & CGF)1577 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1578   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1579   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1580     clearLocThreadIdInsertPt(CGF);
1581     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1582   }
1583   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1584     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1585       UDRMap.erase(D);
1586     FunctionUDRMap.erase(CGF.CurFn);
1587   }
1588   auto I = FunctionUDMMap.find(CGF.CurFn);
1589   if (I != FunctionUDMMap.end()) {
1590     for(const auto *D : I->second)
1591       UDMMap.erase(D);
1592     FunctionUDMMap.erase(I);
1593   }
1594   LastprivateConditionalToTypes.erase(CGF.CurFn);
1595 }
1596 
getIdentTyPointerTy()1597 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1598   return IdentTy->getPointerTo();
1599 }
1600 
getKmpc_MicroPointerTy()1601 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1602   if (!Kmpc_MicroTy) {
1603     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1604     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1605                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1606     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1607   }
1608   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1609 }
1610 
1611 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned)1612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1616                                             : "__kmpc_for_static_init_4u")
1617                                 : (IVSigned ? "__kmpc_for_static_init_8"
1618                                             : "__kmpc_for_static_init_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     CGM.Int32Ty,                               // schedtype
1625     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1626     PtrTy,                                     // p_lower
1627     PtrTy,                                     // p_upper
1628     PtrTy,                                     // p_stride
1629     ITy,                                       // incr
1630     ITy                                        // chunk
1631   };
1632   auto *FnTy =
1633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1634   return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636 
1637 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1639   assert((IVSize == 32 || IVSize == 64) &&
1640          "IV size is not compatible with the omp runtime");
1641   StringRef Name =
1642       IVSize == 32
1643           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1644           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1647                                CGM.Int32Ty,           // tid
1648                                CGM.Int32Ty,           // schedtype
1649                                ITy,                   // lower
1650                                ITy,                   // upper
1651                                ITy,                   // stride
1652                                ITy                    // chunk
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1661   assert((IVSize == 32 || IVSize == 64) &&
1662          "IV size is not compatible with the omp runtime");
1663   StringRef Name =
1664       IVSize == 32
1665           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1666           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1667   llvm::Type *TypeParams[] = {
1668       getIdentTyPointerTy(), // loc
1669       CGM.Int32Ty,           // tid
1670   };
1671   auto *FnTy =
1672       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1673   return CGM.CreateRuntimeFunction(FnTy, Name);
1674 }
1675 
1676 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1678   assert((IVSize == 32 || IVSize == 64) &&
1679          "IV size is not compatible with the omp runtime");
1680   StringRef Name =
1681       IVSize == 32
1682           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1683           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1684   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1685   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1686   llvm::Type *TypeParams[] = {
1687     getIdentTyPointerTy(),                     // loc
1688     CGM.Int32Ty,                               // tid
1689     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1690     PtrTy,                                     // p_lower
1691     PtrTy,                                     // p_upper
1692     PtrTy                                      // p_stride
1693   };
1694   auto *FnTy =
1695       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1696   return CGM.CreateRuntimeFunction(FnTy, Name);
1697 }
1698 
1699 /// Obtain information that uniquely identifies a target entry. This
1700 /// consists of the file and device IDs as well as line number associated with
1701 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)1702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1703                                      unsigned &DeviceID, unsigned &FileID,
1704                                      unsigned &LineNum) {
1705   SourceManager &SM = C.getSourceManager();
1706 
1707   // The loc should be always valid and have a file ID (the user cannot use
1708   // #pragma directives in macros)
1709 
1710   assert(Loc.isValid() && "Source location is expected to be always valid.");
1711 
1712   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1713   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1714 
1715   llvm::sys::fs::UniqueID ID;
1716   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1717     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1718         << PLoc.getFilename() << EC.message();
1719 
1720   DeviceID = ID.getDevice();
1721   FileID = ID.getFile();
1722   LineNum = PLoc.getLine();
1723 }
1724 
getAddrOfDeclareTargetVar(const VarDecl * VD)1725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1726   if (CGM.getLangOpts().OpenMPSimd)
1727     return Address::invalid();
1728   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1729       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1730   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1731               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1732                HasRequiresUnifiedSharedMemory))) {
1733     SmallString<64> PtrName;
1734     {
1735       llvm::raw_svector_ostream OS(PtrName);
1736       OS << CGM.getMangledName(GlobalDecl(VD));
1737       if (!VD->isExternallyVisible()) {
1738         unsigned DeviceID, FileID, Line;
1739         getTargetEntryUniqueInfo(CGM.getContext(),
1740                                  VD->getCanonicalDecl()->getBeginLoc(),
1741                                  DeviceID, FileID, Line);
1742         OS << llvm::format("_%x", FileID);
1743       }
1744       OS << "_decl_tgt_ref_ptr";
1745     }
1746     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1747     if (!Ptr) {
1748       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1749       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1750                                         PtrName);
1751 
1752       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1753       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1754 
1755       if (!CGM.getLangOpts().OpenMPIsDevice)
1756         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1757       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1758     }
1759     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1760   }
1761   return Address::invalid();
1762 }
1763 
1764 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1766   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1767          !CGM.getContext().getTargetInfo().isTLSSupported());
1768   // Lookup the entry, lazily creating it if necessary.
1769   std::string Suffix = getName({"cache", ""});
1770   return getOrCreateInternalVariable(
1771       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1772 }
1773 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1775                                                 const VarDecl *VD,
1776                                                 Address VDAddr,
1777                                                 SourceLocation Loc) {
1778   if (CGM.getLangOpts().OpenMPUseTLS &&
1779       CGM.getContext().getTargetInfo().isTLSSupported())
1780     return VDAddr;
1781 
1782   llvm::Type *VarTy = VDAddr.getElementType();
1783   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1784                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1785                                                        CGM.Int8PtrTy),
1786                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1787                          getOrCreateThreadPrivateCache(VD)};
1788   return Address(CGF.EmitRuntimeCall(
1789                      OMPBuilder.getOrCreateRuntimeFunction(
1790                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1791                      Args),
1792                  VDAddr.getAlignment());
1793 }
1794 
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1795 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1796     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1797     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1798   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1799   // library.
1800   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1801   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1802                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1803                       OMPLoc);
1804   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1805   // to register constructor/destructor for variable.
1806   llvm::Value *Args[] = {
1807       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1808       Ctor, CopyCtor, Dtor};
1809   CGF.EmitRuntimeCall(
1810       OMPBuilder.getOrCreateRuntimeFunction(
1811           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1812       Args);
1813 }
1814 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1815 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1816     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1817     bool PerformInit, CodeGenFunction *CGF) {
1818   if (CGM.getLangOpts().OpenMPUseTLS &&
1819       CGM.getContext().getTargetInfo().isTLSSupported())
1820     return nullptr;
1821 
1822   VD = VD->getDefinition(CGM.getContext());
1823   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1824     QualType ASTTy = VD->getType();
1825 
1826     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1827     const Expr *Init = VD->getAnyInitializer();
1828     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1829       // Generate function that re-emits the declaration's initializer into the
1830       // threadprivate copy of the variable VD
1831       CodeGenFunction CtorCGF(CGM);
1832       FunctionArgList Args;
1833       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1834                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1835                             ImplicitParamDecl::Other);
1836       Args.push_back(&Dst);
1837 
1838       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1839           CGM.getContext().VoidPtrTy, Args);
1840       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1841       std::string Name = getName({"__kmpc_global_ctor_", ""});
1842       llvm::Function *Fn =
1843           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1844       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1845                             Args, Loc, Loc);
1846       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1847           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1848           CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1850       Arg = CtorCGF.Builder.CreateElementBitCast(
1851           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1852       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1853                                /*IsInitializer=*/true);
1854       ArgVal = CtorCGF.EmitLoadOfScalar(
1855           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1856           CGM.getContext().VoidPtrTy, Dst.getLocation());
1857       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1858       CtorCGF.FinishFunction();
1859       Ctor = Fn;
1860     }
1861     if (VD->getType().isDestructedType() != QualType::DK_none) {
1862       // Generate function that emits destructor call for the threadprivate copy
1863       // of the variable VD
1864       CodeGenFunction DtorCGF(CGM);
1865       FunctionArgList Args;
1866       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1867                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1868                             ImplicitParamDecl::Other);
1869       Args.push_back(&Dst);
1870 
1871       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1872           CGM.getContext().VoidTy, Args);
1873       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1874       std::string Name = getName({"__kmpc_global_dtor_", ""});
1875       llvm::Function *Fn =
1876           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1877       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1878       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1879                             Loc, Loc);
1880       // Create a scope with an artificial location for the body of this function.
1881       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1882       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1883           DtorCGF.GetAddrOfLocalVar(&Dst),
1884           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1885       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1886                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1887                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1888       DtorCGF.FinishFunction();
1889       Dtor = Fn;
1890     }
1891     // Do not emit init function if it is not required.
1892     if (!Ctor && !Dtor)
1893       return nullptr;
1894 
1895     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1896     unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
1897     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1898                                                /*isVarArg=*/false)
1899                            ->getPointerTo(DefaultAS);
1900     // Copying constructor for the threadprivate variable.
1901     // Must be NULL - reserved by runtime, but currently it requires that this
1902     // parameter is always NULL. Otherwise it fires assertion.
1903     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1904     if (Ctor == nullptr) {
1905       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1906                                              /*isVarArg=*/false)
1907                          ->getPointerTo(DefaultAS);
1908       Ctor = llvm::Constant::getNullValue(CtorTy);
1909     }
1910     if (Dtor == nullptr) {
1911       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1912                                              /*isVarArg=*/false)
1913                          ->getPointerTo(DefaultAS);
1914       Dtor = llvm::Constant::getNullValue(DtorTy);
1915     }
1916     if (!CGF) {
1917       auto *InitFunctionTy =
1918           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1919       std::string Name = getName({"__omp_threadprivate_init_", ""});
1920       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1921           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1922       CodeGenFunction InitCGF(CGM);
1923       FunctionArgList ArgList;
1924       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1925                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1926                             Loc, Loc);
1927       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1928       InitCGF.FinishFunction();
1929       return InitFunction;
1930     }
1931     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1932   }
1933   return nullptr;
1934 }
1935 
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)1936 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1937                                                      llvm::GlobalVariable *Addr,
1938                                                      bool PerformInit) {
1939   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1940       !CGM.getLangOpts().OpenMPIsDevice)
1941     return false;
1942   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1943       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1944   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1945       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1946        HasRequiresUnifiedSharedMemory))
1947     return CGM.getLangOpts().OpenMPIsDevice;
1948   VD = VD->getDefinition(CGM.getContext());
1949   assert(VD && "Unknown VarDecl");
1950 
1951   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1952     return CGM.getLangOpts().OpenMPIsDevice;
1953 
1954   QualType ASTTy = VD->getType();
1955   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1956 
1957   // Produce the unique prefix to identify the new target regions. We use
1958   // the source location of the variable declaration which we know to not
1959   // conflict with any target region.
1960   unsigned DeviceID;
1961   unsigned FileID;
1962   unsigned Line;
1963   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1964   SmallString<128> Buffer, Out;
1965   {
1966     llvm::raw_svector_ostream OS(Buffer);
1967     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1968        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1969   }
1970 
1971   const Expr *Init = VD->getAnyInitializer();
1972   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1973     llvm::Constant *Ctor;
1974     llvm::Constant *ID;
1975     if (CGM.getLangOpts().OpenMPIsDevice) {
1976       // Generate function that re-emits the declaration's initializer into
1977       // the threadprivate copy of the variable VD
1978       CodeGenFunction CtorCGF(CGM);
1979 
1980       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1981       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1982       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1983           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1984       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1985       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1986                             FunctionArgList(), Loc, Loc);
1987       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1988       CtorCGF.EmitAnyExprToMem(Init,
1989                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1990                                Init->getType().getQualifiers(),
1991                                /*IsInitializer=*/true);
1992       CtorCGF.FinishFunction();
1993       Ctor = Fn;
1994       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1995       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1996     } else {
1997       Ctor = new llvm::GlobalVariable(
1998           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1999           llvm::GlobalValue::PrivateLinkage,
2000           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2001       ID = Ctor;
2002     }
2003 
2004     // Register the information for the entry associated with the constructor.
2005     Out.clear();
2006     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2007         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2008         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2009   }
2010   if (VD->getType().isDestructedType() != QualType::DK_none) {
2011     llvm::Constant *Dtor;
2012     llvm::Constant *ID;
2013     if (CGM.getLangOpts().OpenMPIsDevice) {
2014       // Generate function that emits destructor call for the threadprivate
2015       // copy of the variable VD
2016       CodeGenFunction DtorCGF(CGM);
2017 
2018       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2019       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2020       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2021           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2022       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2023       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2024                             FunctionArgList(), Loc, Loc);
2025       // Create a scope with an artificial location for the body of this
2026       // function.
2027       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2028       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2029                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2030                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2031       DtorCGF.FinishFunction();
2032       Dtor = Fn;
2033       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2034       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2035     } else {
2036       Dtor = new llvm::GlobalVariable(
2037           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2038           llvm::GlobalValue::PrivateLinkage,
2039           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2040       ID = Dtor;
2041     }
2042     // Register the information for the entry associated with the destructor.
2043     Out.clear();
2044     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2045         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2046         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2047   }
2048   return CGM.getLangOpts().OpenMPIsDevice;
2049 }
2050 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)2051 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2052                                                           QualType VarType,
2053                                                           StringRef Name) {
2054   std::string Suffix = getName({"artificial", ""});
2055   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2056   llvm::Value *GAddr =
2057       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2058   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2059       CGM.getTarget().isTLSSupported()) {
2060     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2061     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2062   }
2063   std::string CacheSuffix = getName({"cache", ""});
2064   llvm::Value *Args[] = {
2065       emitUpdateLocation(CGF, SourceLocation()),
2066       getThreadID(CGF, SourceLocation()),
2067       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2068       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2069                                 /*isSigned=*/false),
2070       getOrCreateInternalVariable(
2071           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2072   return Address(
2073       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2074           CGF.EmitRuntimeCall(
2075               OMPBuilder.getOrCreateRuntimeFunction(
2076                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2077               Args),
2078           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2079       CGM.getContext().getTypeAlignInChars(VarType));
2080 }
2081 
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)2082 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2083                                    const RegionCodeGenTy &ThenGen,
2084                                    const RegionCodeGenTy &ElseGen) {
2085   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2086 
2087   // If the condition constant folds and can be elided, try to avoid emitting
2088   // the condition and the dead arm of the if/else.
2089   bool CondConstant;
2090   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2091     if (CondConstant)
2092       ThenGen(CGF);
2093     else
2094       ElseGen(CGF);
2095     return;
2096   }
2097 
2098   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2099   // emit the conditional branch.
2100   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2101   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2102   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2103   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2104 
2105   // Emit the 'then' code.
2106   CGF.EmitBlock(ThenBlock);
2107   ThenGen(CGF);
2108   CGF.EmitBranch(ContBlock);
2109   // Emit the 'else' code if present.
2110   // There is no need to emit line number for unconditional branch.
2111   (void)ApplyDebugLocation::CreateEmpty(CGF);
2112   CGF.EmitBlock(ElseBlock);
2113   ElseGen(CGF);
2114   // There is no need to emit line number for unconditional branch.
2115   (void)ApplyDebugLocation::CreateEmpty(CGF);
2116   CGF.EmitBranch(ContBlock);
2117   // Emit the continuation block for code after the if.
2118   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2119 }
2120 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)2121 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2122                                        llvm::Function *OutlinedFn,
2123                                        ArrayRef<llvm::Value *> CapturedVars,
2124                                        const Expr *IfCond) {
2125   if (!CGF.HaveInsertPoint())
2126     return;
2127   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2128   auto &M = CGM.getModule();
2129   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2130                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2131     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2132     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2133     llvm::Value *Args[] = {
2134         RTLoc,
2135         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2136         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2137     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2138     RealArgs.append(std::begin(Args), std::end(Args));
2139     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2140 
2141     llvm::FunctionCallee RTLFn =
2142         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2143     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2144   };
2145   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2146                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2147     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2148     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2149     // Build calls:
2150     // __kmpc_serialized_parallel(&Loc, GTid);
2151     llvm::Value *Args[] = {RTLoc, ThreadID};
2152     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2153                             M, OMPRTL___kmpc_serialized_parallel),
2154                         Args);
2155 
2156     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2157     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2158     Address ZeroAddrBound =
2159         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2160                                          /*Name=*/".bound.zero.addr");
2161     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2162     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2163     // ThreadId for serialized parallels is 0.
2164     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2165     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2166     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2167     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2168 
2169     // __kmpc_end_serialized_parallel(&Loc, GTid);
2170     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2171     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2172                             M, OMPRTL___kmpc_end_serialized_parallel),
2173                         EndArgs);
2174   };
2175   if (IfCond) {
2176     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2177   } else {
2178     RegionCodeGenTy ThenRCG(ThenGen);
2179     ThenRCG(CGF);
2180   }
2181 }
2182 
2183 // If we're inside an (outlined) parallel region, use the region info's
2184 // thread-ID variable (it is passed in a first argument of the outlined function
2185 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2186 // regular serial code region, get thread ID by calling kmp_int32
2187 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2188 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)2189 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2190                                              SourceLocation Loc) {
2191   if (auto *OMPRegionInfo =
2192           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2193     if (OMPRegionInfo->getThreadIDVariable())
2194       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2195 
2196   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2197   QualType Int32Ty =
2198       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2199   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2200   CGF.EmitStoreOfScalar(ThreadID,
2201                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2202 
2203   return ThreadIDTemp;
2204 }
2205 
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)2206 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2207     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2208   SmallString<256> Buffer;
2209   llvm::raw_svector_ostream Out(Buffer);
2210   Out << Name;
2211   StringRef RuntimeName = Out.str();
2212   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2213   if (Elem.second) {
2214     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2215            "OMP internal variable has different type than requested");
2216     return &*Elem.second;
2217   }
2218 
2219   return Elem.second = new llvm::GlobalVariable(
2220              CGM.getModule(), Ty, /*IsConstant*/ false,
2221              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2222              Elem.first(), /*InsertBefore=*/nullptr,
2223              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2224 }
2225 
getCriticalRegionLock(StringRef CriticalName)2226 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2227   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2228   std::string Name = getName({Prefix, "var"});
2229   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2230 }
2231 
2232 namespace {
2233 /// Common pre(post)-action for different OpenMP constructs.
2234 class CommonActionTy final : public PrePostActionTy {
2235   llvm::FunctionCallee EnterCallee;
2236   ArrayRef<llvm::Value *> EnterArgs;
2237   llvm::FunctionCallee ExitCallee;
2238   ArrayRef<llvm::Value *> ExitArgs;
2239   bool Conditional;
2240   llvm::BasicBlock *ContBlock = nullptr;
2241 
2242 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2243   CommonActionTy(llvm::FunctionCallee EnterCallee,
2244                  ArrayRef<llvm::Value *> EnterArgs,
2245                  llvm::FunctionCallee ExitCallee,
2246                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2247       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2248         ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2249   void Enter(CodeGenFunction &CGF) override {
2250     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2251     if (Conditional) {
2252       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2253       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2254       ContBlock = CGF.createBasicBlock("omp_if.end");
2255       // Generate the branch (If-stmt)
2256       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2257       CGF.EmitBlock(ThenBlock);
2258     }
2259   }
Done(CodeGenFunction & CGF)2260   void Done(CodeGenFunction &CGF) {
2261     // Emit the rest of blocks/branches
2262     CGF.EmitBranch(ContBlock);
2263     CGF.EmitBlock(ContBlock, true);
2264   }
Exit(CodeGenFunction & CGF)2265   void Exit(CodeGenFunction &CGF) override {
2266     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2267   }
2268 };
2269 } // anonymous namespace
2270 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2271 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2272                                          StringRef CriticalName,
2273                                          const RegionCodeGenTy &CriticalOpGen,
2274                                          SourceLocation Loc, const Expr *Hint) {
2275   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2276   // CriticalOpGen();
2277   // __kmpc_end_critical(ident_t *, gtid, Lock);
2278   // Prepare arguments and build a call to __kmpc_critical
2279   if (!CGF.HaveInsertPoint())
2280     return;
2281   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2282                          getCriticalRegionLock(CriticalName)};
2283   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2284                                                 std::end(Args));
2285   if (Hint) {
2286     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2287         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2288   }
2289   CommonActionTy Action(
2290       OMPBuilder.getOrCreateRuntimeFunction(
2291           CGM.getModule(),
2292           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2293       EnterArgs,
2294       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2295                                             OMPRTL___kmpc_end_critical),
2296       Args);
2297   CriticalOpGen.setAction(Action);
2298   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2299 }
2300 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2301 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2302                                        const RegionCodeGenTy &MasterOpGen,
2303                                        SourceLocation Loc) {
2304   if (!CGF.HaveInsertPoint())
2305     return;
2306   // if(__kmpc_master(ident_t *, gtid)) {
2307   //   MasterOpGen();
2308   //   __kmpc_end_master(ident_t *, gtid);
2309   // }
2310   // Prepare arguments and build a call to __kmpc_master
2311   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2312   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2313                             CGM.getModule(), OMPRTL___kmpc_master),
2314                         Args,
2315                         OMPBuilder.getOrCreateRuntimeFunction(
2316                             CGM.getModule(), OMPRTL___kmpc_end_master),
2317                         Args,
2318                         /*Conditional=*/true);
2319   MasterOpGen.setAction(Action);
2320   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2321   Action.Done(CGF);
2322 }
2323 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2324 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2325                                         SourceLocation Loc) {
2326   if (!CGF.HaveInsertPoint())
2327     return;
2328   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2329     OMPBuilder.CreateTaskyield(CGF.Builder);
2330   } else {
2331     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2332     llvm::Value *Args[] = {
2333         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2334         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2335     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2336                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2337                         Args);
2338   }
2339 
2340   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2341     Region->emitUntiedSwitch(CGF);
2342 }
2343 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2344 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2345                                           const RegionCodeGenTy &TaskgroupOpGen,
2346                                           SourceLocation Loc) {
2347   if (!CGF.HaveInsertPoint())
2348     return;
2349   // __kmpc_taskgroup(ident_t *, gtid);
2350   // TaskgroupOpGen();
2351   // __kmpc_end_taskgroup(ident_t *, gtid);
2352   // Prepare arguments and build a call to __kmpc_taskgroup
2353   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2354   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2355                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2356                         Args,
2357                         OMPBuilder.getOrCreateRuntimeFunction(
2358                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2359                         Args);
2360   TaskgroupOpGen.setAction(Action);
2361   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2362 }
2363 
2364 /// Given an array of pointers to variables, project the address of a
2365 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2366 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2367                                       unsigned Index, const VarDecl *Var) {
2368   // Pull out the pointer to the variable.
2369   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2370   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2371 
2372   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2373   Addr = CGF.Builder.CreateElementBitCast(
2374       Addr, CGF.ConvertTypeForMem(Var->getType()));
2375   return Addr;
2376 }
2377 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2378 static llvm::Value *emitCopyprivateCopyFunction(
2379     CodeGenModule &CGM, llvm::Type *ArgsType,
2380     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2381     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2382     SourceLocation Loc) {
2383   ASTContext &C = CGM.getContext();
2384   // void copy_func(void *LHSArg, void *RHSArg);
2385   FunctionArgList Args;
2386   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2389                            ImplicitParamDecl::Other);
2390   Args.push_back(&LHSArg);
2391   Args.push_back(&RHSArg);
2392   const auto &CGFI =
2393       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2394   std::string Name =
2395       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2396   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2397                                     llvm::GlobalValue::InternalLinkage, Name,
2398                                     &CGM.getModule());
2399   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2400   Fn->setDoesNotRecurse();
2401   CodeGenFunction CGF(CGM);
2402   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2403   // Dest = (void*[n])(LHSArg);
2404   // Src = (void*[n])(RHSArg);
2405   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2406       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2407       ArgsType), CGF.getPointerAlign());
2408   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2410       ArgsType), CGF.getPointerAlign());
2411   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2412   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2413   // ...
2414   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2415   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2416     const auto *DestVar =
2417         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2418     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2419 
2420     const auto *SrcVar =
2421         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2422     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2423 
2424     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2425     QualType Type = VD->getType();
2426     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2427   }
2428   CGF.FinishFunction();
2429   return Fn;
2430 }
2431 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2432 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2433                                        const RegionCodeGenTy &SingleOpGen,
2434                                        SourceLocation Loc,
2435                                        ArrayRef<const Expr *> CopyprivateVars,
2436                                        ArrayRef<const Expr *> SrcExprs,
2437                                        ArrayRef<const Expr *> DstExprs,
2438                                        ArrayRef<const Expr *> AssignmentOps) {
2439   if (!CGF.HaveInsertPoint())
2440     return;
2441   assert(CopyprivateVars.size() == SrcExprs.size() &&
2442          CopyprivateVars.size() == DstExprs.size() &&
2443          CopyprivateVars.size() == AssignmentOps.size());
2444   ASTContext &C = CGM.getContext();
2445   // int32 did_it = 0;
2446   // if(__kmpc_single(ident_t *, gtid)) {
2447   //   SingleOpGen();
2448   //   __kmpc_end_single(ident_t *, gtid);
2449   //   did_it = 1;
2450   // }
2451   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2452   // <copy_func>, did_it);
2453 
2454   Address DidIt = Address::invalid();
2455   if (!CopyprivateVars.empty()) {
2456     // int32 did_it = 0;
2457     QualType KmpInt32Ty =
2458         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2459     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2460     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2461   }
2462   // Prepare arguments and build a call to __kmpc_single
2463   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2464   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2465                             CGM.getModule(), OMPRTL___kmpc_single),
2466                         Args,
2467                         OMPBuilder.getOrCreateRuntimeFunction(
2468                             CGM.getModule(), OMPRTL___kmpc_end_single),
2469                         Args,
2470                         /*Conditional=*/true);
2471   SingleOpGen.setAction(Action);
2472   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2473   if (DidIt.isValid()) {
2474     // did_it = 1;
2475     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2476   }
2477   Action.Done(CGF);
2478   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2479   // <copy_func>, did_it);
2480   if (DidIt.isValid()) {
2481     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2482     QualType CopyprivateArrayTy = C.getConstantArrayType(
2483         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2484         /*IndexTypeQuals=*/0);
2485     // Create a list of all private variables for copyprivate.
2486     Address CopyprivateList =
2487         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2488     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2489       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2490       CGF.Builder.CreateStore(
2491           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2492               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2493               CGF.VoidPtrTy),
2494           Elem);
2495     }
2496     // Build function that copies private values from single region to all other
2497     // threads in the corresponding parallel region.
2498     unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(DefaultAS),
2501         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL =
2504       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505                                                       CGF.VoidPtrTy);
2506     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507     llvm::Value *Args[] = {
2508         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509         getThreadID(CGF, Loc),        // i32 <gtid>
2510         BufSize,                      // size_t <buf_size>
2511         CL.getPointer(),              // void *<copyprivate list>
2512         CpyFn,                        // void (*) (void *, void *) <copy_func>
2513         DidItVal                      // i32 did_it
2514     };
2515     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517                         Args);
2518   }
2519 }
2520 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2522                                         const RegionCodeGenTy &OrderedOpGen,
2523                                         SourceLocation Loc, bool IsThreads) {
2524   if (!CGF.HaveInsertPoint())
2525     return;
2526   // __kmpc_ordered(ident_t *, gtid);
2527   // OrderedOpGen();
2528   // __kmpc_end_ordered(ident_t *, gtid);
2529   // Prepare arguments and build a call to __kmpc_ordered
2530   if (IsThreads) {
2531     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_ordered),
2534                           Args,
2535                           OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537                           Args);
2538     OrderedOpGen.setAction(Action);
2539     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540     return;
2541   }
2542   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544 
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2546   unsigned Flags;
2547   if (Kind == OMPD_for)
2548     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549   else if (Kind == OMPD_sections)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551   else if (Kind == OMPD_single)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553   else if (Kind == OMPD_barrier)
2554     Flags = OMP_IDENT_BARRIER_EXPL;
2555   else
2556     Flags = OMP_IDENT_BARRIER_IMPL;
2557   return Flags;
2558 }
2559 
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2561     CodeGenFunction &CGF, const OMPLoopDirective &S,
2562     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563   // Check if the loop directive is actually a doacross loop directive. In this
2564   // case choose static, 1 schedule.
2565   if (llvm::any_of(
2566           S.getClausesOfKind<OMPOrderedClause>(),
2567           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568     ScheduleKind = OMPC_SCHEDULE_static;
2569     // Chunk size is 1 in this case.
2570     llvm::APInt ChunkSize(32, 1);
2571     ChunkExpr = IntegerLiteral::Create(
2572         CGF.getContext(), ChunkSize,
2573         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574         SourceLocation());
2575   }
2576 }
2577 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2579                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2580                                       bool ForceSimpleCall) {
2581   // Check if we should use the OMPBuilder
2582   auto *OMPRegionInfo =
2583       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2586         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587     return;
2588   }
2589 
2590   if (!CGF.HaveInsertPoint())
2591     return;
2592   // Build call __kmpc_cancel_barrier(loc, thread_id);
2593   // Build call __kmpc_barrier(loc, thread_id);
2594   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596   // thread_id);
2597   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598                          getThreadID(CGF, Loc)};
2599   if (OMPRegionInfo) {
2600     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601       llvm::Value *Result = CGF.EmitRuntimeCall(
2602           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603                                                 OMPRTL___kmpc_cancel_barrier),
2604           Args);
2605       if (EmitChecks) {
2606         // if (__kmpc_cancel_barrier()) {
2607         //   exit from construct;
2608         // }
2609         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613         CGF.EmitBlock(ExitBB);
2614         //   exit from construct;
2615         CodeGenFunction::JumpDest CancelDestination =
2616             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617         CGF.EmitBranchThroughCleanup(CancelDestination);
2618         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619       }
2620       return;
2621     }
2622   }
2623   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624                           CGM.getModule(), OMPRTL___kmpc_barrier),
2625                       Args);
2626 }
2627 
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630                                           bool Chunked, bool Ordered) {
2631   switch (ScheduleKind) {
2632   case OMPC_SCHEDULE_static:
2633     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2635   case OMPC_SCHEDULE_dynamic:
2636     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637   case OMPC_SCHEDULE_guided:
2638     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639   case OMPC_SCHEDULE_runtime:
2640     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641   case OMPC_SCHEDULE_auto:
2642     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643   case OMPC_SCHEDULE_unknown:
2644     assert(!Chunked && "chunk was specified but schedule kind not known");
2645     return Ordered ? OMP_ord_static : OMP_sch_static;
2646   }
2647   llvm_unreachable("Unexpected runtime schedule");
2648 }
2649 
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2653   // only static is allowed for dist_schedule
2654   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2658                                          bool Chunked) const {
2659   OpenMPSchedType Schedule =
2660       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661   return Schedule == OMP_sch_static;
2662 }
2663 
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2664 bool CGOpenMPRuntime::isStaticNonchunked(
2665     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667   return Schedule == OMP_dist_sch_static;
2668 }
2669 
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2671                                       bool Chunked) const {
2672   OpenMPSchedType Schedule =
2673       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674   return Schedule == OMP_sch_static_chunked;
2675 }
2676 
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2677 bool CGOpenMPRuntime::isStaticChunked(
2678     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680   return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2684   OpenMPSchedType Schedule =
2685       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687   return Schedule != OMP_sch_static;
2688 }
2689 
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2691                                   OpenMPScheduleClauseModifier M1,
2692                                   OpenMPScheduleClauseModifier M2) {
2693   int Modifier = 0;
2694   switch (M1) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   switch (M2) {
2710   case OMPC_SCHEDULE_MODIFIER_monotonic:
2711     Modifier = OMP_sch_modifier_monotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714     Modifier = OMP_sch_modifier_nonmonotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_simd:
2717     if (Schedule == OMP_sch_static_chunked)
2718       Schedule = OMP_sch_static_balanced_chunked;
2719     break;
2720   case OMPC_SCHEDULE_MODIFIER_last:
2721   case OMPC_SCHEDULE_MODIFIER_unknown:
2722     break;
2723   }
2724   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725   // If the static schedule kind is specified or if the ordered clause is
2726   // specified, and if the nonmonotonic modifier is not specified, the effect is
2727   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728   // modifier is specified, the effect is as if the nonmonotonic modifier is
2729   // specified.
2730   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732           Schedule == OMP_sch_static_balanced_chunked ||
2733           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734           Schedule == OMP_dist_sch_static_chunked ||
2735           Schedule == OMP_dist_sch_static))
2736       Modifier = OMP_sch_modifier_nonmonotonic;
2737   }
2738   return Schedule | Modifier;
2739 }
2740 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2741 void CGOpenMPRuntime::emitForDispatchInit(
2742     CodeGenFunction &CGF, SourceLocation Loc,
2743     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744     bool Ordered, const DispatchRTInput &DispatchValues) {
2745   if (!CGF.HaveInsertPoint())
2746     return;
2747   OpenMPSchedType Schedule = getRuntimeSchedule(
2748       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749   assert(Ordered ||
2750          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752           Schedule != OMP_sch_static_balanced_chunked));
2753   // Call __kmpc_dispatch_init(
2754   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2756   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757 
2758   // If the Chunk was not specified in the clause - use default value 1.
2759   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760                                             : CGF.Builder.getIntN(IVSize, 1);
2761   llvm::Value *Args[] = {
2762       emitUpdateLocation(CGF, Loc),
2763       getThreadID(CGF, Loc),
2764       CGF.Builder.getInt32(addMonoNonMonoModifier(
2765           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766       DispatchValues.LB,                                     // Lower
2767       DispatchValues.UB,                                     // Upper
2768       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2769       Chunk                                                  // Chunk
2770   };
2771   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773 
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2774 static void emitForStaticInitCall(
2775     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2777     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2778     const CGOpenMPRuntime::StaticRTInput &Values) {
2779   if (!CGF.HaveInsertPoint())
2780     return;
2781 
2782   assert(!Values.Ordered);
2783   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784          Schedule == OMP_sch_static_balanced_chunked ||
2785          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786          Schedule == OMP_dist_sch_static ||
2787          Schedule == OMP_dist_sch_static_chunked);
2788 
2789   // Call __kmpc_for_static_init(
2790   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794   llvm::Value *Chunk = Values.Chunk;
2795   if (Chunk == nullptr) {
2796     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797             Schedule == OMP_dist_sch_static) &&
2798            "expected static non-chunked schedule");
2799     // If the Chunk was not specified in the clause - use default value 1.
2800     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801   } else {
2802     assert((Schedule == OMP_sch_static_chunked ||
2803             Schedule == OMP_sch_static_balanced_chunked ||
2804             Schedule == OMP_ord_static_chunked ||
2805             Schedule == OMP_dist_sch_static_chunked) &&
2806            "expected static chunked schedule");
2807   }
2808   llvm::Value *Args[] = {
2809       UpdateLocation,
2810       ThreadId,
2811       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812                                                   M2)), // Schedule type
2813       Values.IL.getPointer(),                           // &isLastIter
2814       Values.LB.getPointer(),                           // &LB
2815       Values.UB.getPointer(),                           // &UB
2816       Values.ST.getPointer(),                           // &Stride
2817       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2818       Chunk                                             // Chunk
2819   };
2820   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2824                                         SourceLocation Loc,
2825                                         OpenMPDirectiveKind DKind,
2826                                         const OpenMPScheduleTy &ScheduleKind,
2827                                         const StaticRTInput &Values) {
2828   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830   assert(isOpenMPWorksharingDirective(DKind) &&
2831          "Expected loop-based or sections-based directive.");
2832   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833                                              isOpenMPLoopDirective(DKind)
2834                                                  ? OMP_IDENT_WORK_LOOP
2835                                                  : OMP_IDENT_WORK_SECTIONS);
2836   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837   llvm::FunctionCallee StaticInitFunction =
2838       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2839   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2844 void CGOpenMPRuntime::emitDistributeStaticInit(
2845     CodeGenFunction &CGF, SourceLocation Loc,
2846     OpenMPDistScheduleClauseKind SchedKind,
2847     const CGOpenMPRuntime::StaticRTInput &Values) {
2848   OpenMPSchedType ScheduleNum =
2849       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850   llvm::Value *UpdatedLocation =
2851       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853   llvm::FunctionCallee StaticInitFunction =
2854       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2855   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2856                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2857                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2858 }
2859 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2860 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2861                                           SourceLocation Loc,
2862                                           OpenMPDirectiveKind DKind) {
2863   if (!CGF.HaveInsertPoint())
2864     return;
2865   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2866   llvm::Value *Args[] = {
2867       emitUpdateLocation(CGF, Loc,
2868                          isOpenMPDistributeDirective(DKind)
2869                              ? OMP_IDENT_WORK_DISTRIBUTE
2870                              : isOpenMPLoopDirective(DKind)
2871                                    ? OMP_IDENT_WORK_LOOP
2872                                    : OMP_IDENT_WORK_SECTIONS),
2873       getThreadID(CGF, Loc)};
2874   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2875   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2876                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2877                       Args);
2878 }
2879 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2880 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2881                                                  SourceLocation Loc,
2882                                                  unsigned IVSize,
2883                                                  bool IVSigned) {
2884   if (!CGF.HaveInsertPoint())
2885     return;
2886   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2887   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2888   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2889 }
2890 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2891 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2892                                           SourceLocation Loc, unsigned IVSize,
2893                                           bool IVSigned, Address IL,
2894                                           Address LB, Address UB,
2895                                           Address ST) {
2896   // Call __kmpc_dispatch_next(
2897   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2898   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2899   //          kmp_int[32|64] *p_stride);
2900   llvm::Value *Args[] = {
2901       emitUpdateLocation(CGF, Loc),
2902       getThreadID(CGF, Loc),
2903       IL.getPointer(), // &isLastIter
2904       LB.getPointer(), // &Lower
2905       UB.getPointer(), // &Upper
2906       ST.getPointer()  // &Stride
2907   };
2908   llvm::Value *Call =
2909       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2910   return CGF.EmitScalarConversion(
2911       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2912       CGF.getContext().BoolTy, Loc);
2913 }
2914 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2915 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2916                                            llvm::Value *NumThreads,
2917                                            SourceLocation Loc) {
2918   if (!CGF.HaveInsertPoint())
2919     return;
2920   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2921   llvm::Value *Args[] = {
2922       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2923       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2924   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2925                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2926                       Args);
2927 }
2928 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2929 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2930                                          ProcBindKind ProcBind,
2931                                          SourceLocation Loc) {
2932   if (!CGF.HaveInsertPoint())
2933     return;
2934   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2935   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2936   llvm::Value *Args[] = {
2937       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2939   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2940                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2941                       Args);
2942 }
2943 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2944 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2945                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2946   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2947     OMPBuilder.CreateFlush(CGF.Builder);
2948   } else {
2949     if (!CGF.HaveInsertPoint())
2950       return;
2951     // Build call void __kmpc_flush(ident_t *loc)
2952     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2953                             CGM.getModule(), OMPRTL___kmpc_flush),
2954                         emitUpdateLocation(CGF, Loc));
2955   }
2956 }
2957 
2958 namespace {
2959 /// Indexes of fields for type kmp_task_t.
2960 enum KmpTaskTFields {
2961   /// List of shared variables.
2962   KmpTaskTShareds,
2963   /// Task routine.
2964   KmpTaskTRoutine,
2965   /// Partition id for the untied tasks.
2966   KmpTaskTPartId,
2967   /// Function with call of destructors for private variables.
2968   Data1,
2969   /// Task priority.
2970   Data2,
2971   /// (Taskloops only) Lower bound.
2972   KmpTaskTLowerBound,
2973   /// (Taskloops only) Upper bound.
2974   KmpTaskTUpperBound,
2975   /// (Taskloops only) Stride.
2976   KmpTaskTStride,
2977   /// (Taskloops only) Is last iteration flag.
2978   KmpTaskTLastIter,
2979   /// (Taskloops only) Reduction data.
2980   KmpTaskTReductions,
2981 };
2982 } // anonymous namespace
2983 
empty() const2984 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2985   return OffloadEntriesTargetRegion.empty() &&
2986          OffloadEntriesDeviceGlobalVar.empty();
2987 }
2988 
2989 /// Initialize target region entry.
2990 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2991     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2992                                     StringRef ParentName, unsigned LineNum,
2993                                     unsigned Order) {
2994   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2995                                              "only required for the device "
2996                                              "code generation.");
2997   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2998       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2999                                    OMPTargetRegionEntryTargetRegion);
3000   ++OffloadingEntriesNum;
3001 }
3002 
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)3004     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3005                                   StringRef ParentName, unsigned LineNum,
3006                                   llvm::Constant *Addr, llvm::Constant *ID,
3007                                   OMPTargetRegionEntryKind Flags) {
3008   // If we are emitting code for a target, the entry is already initialized,
3009   // only has to be registered.
3010   if (CGM.getLangOpts().OpenMPIsDevice) {
3011     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3012       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3013           DiagnosticsEngine::Error,
3014           "Unable to find target region on line '%0' in the device code.");
3015       CGM.getDiags().Report(DiagID) << LineNum;
3016       return;
3017     }
3018     auto &Entry =
3019         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3020     assert(Entry.isValid() && "Entry not initialized!");
3021     Entry.setAddress(Addr);
3022     Entry.setID(ID);
3023     Entry.setFlags(Flags);
3024   } else {
3025     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3026     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3027     ++OffloadingEntriesNum;
3028   }
3029 }
3030 
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum) const3031 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3032     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3033     unsigned LineNum) const {
3034   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3035   if (PerDevice == OffloadEntriesTargetRegion.end())
3036     return false;
3037   auto PerFile = PerDevice->second.find(FileID);
3038   if (PerFile == PerDevice->second.end())
3039     return false;
3040   auto PerParentName = PerFile->second.find(ParentName);
3041   if (PerParentName == PerFile->second.end())
3042     return false;
3043   auto PerLine = PerParentName->second.find(LineNum);
3044   if (PerLine == PerParentName->second.end())
3045     return false;
3046   // Fail if this entry is already registered.
3047   if (PerLine->second.getAddress() || PerLine->second.getID())
3048     return false;
3049   return true;
3050 }
3051 
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)3052 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3053     const OffloadTargetRegionEntryInfoActTy &Action) {
3054   // Scan all target region entries and perform the provided action.
3055   for (const auto &D : OffloadEntriesTargetRegion)
3056     for (const auto &F : D.second)
3057       for (const auto &P : F.second)
3058         for (const auto &L : P.second)
3059           Action(D.first, F.first, P.first(), L.first, L.second);
3060 }
3061 
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)3063     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3064                                        OMPTargetGlobalVarEntryKind Flags,
3065                                        unsigned Order) {
3066   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3067                                              "only required for the device "
3068                                              "code generation.");
3069   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3070   ++OffloadingEntriesNum;
3071 }
3072 
3073 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)3074     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3075                                      CharUnits VarSize,
3076                                      OMPTargetGlobalVarEntryKind Flags,
3077                                      llvm::GlobalValue::LinkageTypes Linkage) {
3078   if (CGM.getLangOpts().OpenMPIsDevice) {
3079     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3080     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3081            "Entry not initialized!");
3082     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3083            "Resetting with the new address.");
3084     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3085       if (Entry.getVarSize().isZero()) {
3086         Entry.setVarSize(VarSize);
3087         Entry.setLinkage(Linkage);
3088       }
3089       return;
3090     }
3091     Entry.setVarSize(VarSize);
3092     Entry.setLinkage(Linkage);
3093     Entry.setAddress(Addr);
3094   } else {
3095     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3096       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3097       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3098              "Entry not initialized!");
3099       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3100              "Resetting with the new address.");
3101       if (Entry.getVarSize().isZero()) {
3102         Entry.setVarSize(VarSize);
3103         Entry.setLinkage(Linkage);
3104       }
3105       return;
3106     }
3107     OffloadEntriesDeviceGlobalVar.try_emplace(
3108         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3109     ++OffloadingEntriesNum;
3110   }
3111 }
3112 
3113 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)3114     actOnDeviceGlobalVarEntriesInfo(
3115         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3116   // Scan all target region entries and perform the provided action.
3117   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3118     Action(E.getKey(), E.getValue());
3119 }
3120 
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)3121 void CGOpenMPRuntime::createOffloadEntry(
3122     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3123     llvm::GlobalValue::LinkageTypes Linkage) {
3124   StringRef Name = Addr->getName();
3125   llvm::Module &M = CGM.getModule();
3126   llvm::LLVMContext &C = M.getContext();
3127 
3128   // Create constant string with the name.
3129   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3130 
3131   std::string StringName = getName({"omp_offloading", "entry_name"});
3132   auto *Str = new llvm::GlobalVariable(
3133       M, StrPtrInit->getType(), /*isConstant=*/true,
3134       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3135   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3136 
3137   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3138                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3139                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3140                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3141                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3142   std::string EntryName = getName({"omp_offloading", "entry", ""});
3143   llvm::GlobalVariable *Entry = createGlobalStruct(
3144       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3145       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3146 
3147   // The entry has to be created in the section the linker expects it to be.
3148   Entry->setSection("omp_offloading_entries");
3149 }
3150 
createOffloadEntriesAndInfoMetadata()3151 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3152   // Emit the offloading entries and metadata so that the device codegen side
3153   // can easily figure out what to emit. The produced metadata looks like
3154   // this:
3155   //
3156   // !omp_offload.info = !{!1, ...}
3157   //
3158   // Right now we only generate metadata for function that contain target
3159   // regions.
3160 
3161   // If we are in simd mode or there are no entries, we don't need to do
3162   // anything.
3163   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3164     return;
3165 
3166   llvm::Module &M = CGM.getModule();
3167   llvm::LLVMContext &C = M.getContext();
3168   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3169                          SourceLocation, StringRef>,
3170               16>
3171       OrderedEntries(OffloadEntriesInfoManager.size());
3172   llvm::SmallVector<StringRef, 16> ParentFunctions(
3173       OffloadEntriesInfoManager.size());
3174 
3175   // Auxiliary methods to create metadata values and strings.
3176   auto &&GetMDInt = [this](unsigned V) {
3177     return llvm::ConstantAsMetadata::get(
3178         llvm::ConstantInt::get(CGM.Int32Ty, V));
3179   };
3180 
3181   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3182 
3183   // Create the offloading info metadata node.
3184   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3185 
3186   // Create function that emits metadata for each target region entry;
3187   auto &&TargetRegionMetadataEmitter =
3188       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3189        &GetMDString](
3190           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3191           unsigned Line,
3192           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3193         // Generate metadata for target regions. Each entry of this metadata
3194         // contains:
3195         // - Entry 0 -> Kind of this type of metadata (0).
3196         // - Entry 1 -> Device ID of the file where the entry was identified.
3197         // - Entry 2 -> File ID of the file where the entry was identified.
3198         // - Entry 3 -> Mangled name of the function where the entry was
3199         // identified.
3200         // - Entry 4 -> Line in the file where the entry was identified.
3201         // - Entry 5 -> Order the entry was created.
3202         // The first element of the metadata node is the kind.
3203         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3204                                  GetMDInt(FileID),      GetMDString(ParentName),
3205                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3206 
3207         SourceLocation Loc;
3208         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3209                   E = CGM.getContext().getSourceManager().fileinfo_end();
3210              I != E; ++I) {
3211           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3212               I->getFirst()->getUniqueID().getFile() == FileID) {
3213             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3214                 I->getFirst(), Line, 1);
3215             break;
3216           }
3217         }
3218         // Save this entry in the right position of the ordered entries array.
3219         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3220         ParentFunctions[E.getOrder()] = ParentName;
3221 
3222         // Add metadata to the named metadata node.
3223         MD->addOperand(llvm::MDNode::get(C, Ops));
3224       };
3225 
3226   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3227       TargetRegionMetadataEmitter);
3228 
3229   // Create function that emits metadata for each device global variable entry;
3230   auto &&DeviceGlobalVarMetadataEmitter =
3231       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3232        MD](StringRef MangledName,
3233            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3234                &E) {
3235         // Generate metadata for global variables. Each entry of this metadata
3236         // contains:
3237         // - Entry 0 -> Kind of this type of metadata (1).
3238         // - Entry 1 -> Mangled name of the variable.
3239         // - Entry 2 -> Declare target kind.
3240         // - Entry 3 -> Order the entry was created.
3241         // The first element of the metadata node is the kind.
3242         llvm::Metadata *Ops[] = {
3243             GetMDInt(E.getKind()), GetMDString(MangledName),
3244             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3245 
3246         // Save this entry in the right position of the ordered entries array.
3247         OrderedEntries[E.getOrder()] =
3248             std::make_tuple(&E, SourceLocation(), MangledName);
3249 
3250         // Add metadata to the named metadata node.
3251         MD->addOperand(llvm::MDNode::get(C, Ops));
3252       };
3253 
3254   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3255       DeviceGlobalVarMetadataEmitter);
3256 
3257   for (const auto &E : OrderedEntries) {
3258     assert(std::get<0>(E) && "All ordered entries must exist!");
3259     if (const auto *CE =
3260             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3261                 std::get<0>(E))) {
3262       if (!CE->getID() || !CE->getAddress()) {
3263         // Do not blame the entry if the parent funtion is not emitted.
3264         StringRef FnName = ParentFunctions[CE->getOrder()];
3265         if (!CGM.GetGlobalValue(FnName))
3266           continue;
3267         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3268             DiagnosticsEngine::Error,
3269             "Offloading entry for target region in %0 is incorrect: either the "
3270             "address or the ID is invalid.");
3271         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3272         continue;
3273       }
3274       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3275                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3276     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3277                                              OffloadEntryInfoDeviceGlobalVar>(
3278                    std::get<0>(E))) {
3279       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3280           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3281               CE->getFlags());
3282       switch (Flags) {
3283       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3284         if (CGM.getLangOpts().OpenMPIsDevice &&
3285             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3286           continue;
3287         if (!CE->getAddress()) {
3288           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3289               DiagnosticsEngine::Error, "Offloading entry for declare target "
3290                                         "variable %0 is incorrect: the "
3291                                         "address is invalid.");
3292           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3293           continue;
3294         }
3295         // The vaiable has no definition - no need to add the entry.
3296         if (CE->getVarSize().isZero())
3297           continue;
3298         break;
3299       }
3300       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3301         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3302                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3303                "Declaret target link address is set.");
3304         if (CGM.getLangOpts().OpenMPIsDevice)
3305           continue;
3306         if (!CE->getAddress()) {
3307           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3308               DiagnosticsEngine::Error,
3309               "Offloading entry for declare target variable is incorrect: the "
3310               "address is invalid.");
3311           CGM.getDiags().Report(DiagID);
3312           continue;
3313         }
3314         break;
3315       }
3316       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3317                          CE->getVarSize().getQuantity(), Flags,
3318                          CE->getLinkage());
3319     } else {
3320       llvm_unreachable("Unsupported entry kind.");
3321     }
3322   }
3323 }
3324 
3325 /// Loads all the offload entries information from the host IR
3326 /// metadata.
loadOffloadInfoMetadata()3327 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3328   // If we are in target mode, load the metadata from the host IR. This code has
3329   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3330 
3331   if (!CGM.getLangOpts().OpenMPIsDevice)
3332     return;
3333 
3334   if (CGM.getLangOpts().OMPHostIRFile.empty())
3335     return;
3336 
3337   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3338   if (auto EC = Buf.getError()) {
3339     CGM.getDiags().Report(diag::err_cannot_open_file)
3340         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3341     return;
3342   }
3343 
3344   llvm::LLVMContext C;
3345   auto ME = expectedToErrorOrAndEmitErrors(
3346       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3347 
3348   if (auto EC = ME.getError()) {
3349     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3350         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3351     CGM.getDiags().Report(DiagID)
3352         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3353     return;
3354   }
3355 
3356   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3357   if (!MD)
3358     return;
3359 
3360   for (llvm::MDNode *MN : MD->operands()) {
3361     auto &&GetMDInt = [MN](unsigned Idx) {
3362       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3363       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3364     };
3365 
3366     auto &&GetMDString = [MN](unsigned Idx) {
3367       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3368       return V->getString();
3369     };
3370 
3371     switch (GetMDInt(0)) {
3372     default:
3373       llvm_unreachable("Unexpected metadata!");
3374       break;
3375     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3376         OffloadingEntryInfoTargetRegion:
3377       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3378           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3379           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3380           /*Order=*/GetMDInt(5));
3381       break;
3382     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3383         OffloadingEntryInfoDeviceGlobalVar:
3384       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3385           /*MangledName=*/GetMDString(1),
3386           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3387               /*Flags=*/GetMDInt(2)),
3388           /*Order=*/GetMDInt(3));
3389       break;
3390     }
3391   }
3392 }
3393 
emitKmpRoutineEntryT(QualType KmpInt32Ty)3394 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3395   if (!KmpRoutineEntryPtrTy) {
3396     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3397     ASTContext &C = CGM.getContext();
3398     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3399     FunctionProtoType::ExtProtoInfo EPI;
3400     KmpRoutineEntryPtrQTy = C.getPointerType(
3401         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3402     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3403   }
3404 }
3405 
getTgtOffloadEntryQTy()3406 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3407   // Make sure the type of the entry is already created. This is the type we
3408   // have to create:
3409   // struct __tgt_offload_entry{
3410   //   void      *addr;       // Pointer to the offload entry info.
3411   //                          // (function or global)
3412   //   char      *name;       // Name of the function or global.
3413   //   size_t     size;       // Size of the entry info (0 if it a function).
3414   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3415   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3416   // };
3417   if (TgtOffloadEntryQTy.isNull()) {
3418     ASTContext &C = CGM.getContext();
3419     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3420     RD->startDefinition();
3421     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3422     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3423     addFieldToRecordDecl(C, RD, C.getSizeType());
3424     addFieldToRecordDecl(
3425         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3426     addFieldToRecordDecl(
3427         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3428     RD->completeDefinition();
3429     RD->addAttr(PackedAttr::CreateImplicit(C));
3430     TgtOffloadEntryQTy = C.getRecordType(RD);
3431   }
3432   return TgtOffloadEntryQTy;
3433 }
3434 
3435 namespace {
3436 struct PrivateHelpersTy {
PrivateHelpersTy__anone0633a091611::PrivateHelpersTy3437   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3438                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3439       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3440         PrivateElemInit(PrivateElemInit) {}
3441   const Expr *OriginalRef = nullptr;
3442   const VarDecl *Original = nullptr;
3443   const VarDecl *PrivateCopy = nullptr;
3444   const VarDecl *PrivateElemInit = nullptr;
3445 };
3446 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3447 } // anonymous namespace
3448 
3449 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3450 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3451   if (!Privates.empty()) {
3452     ASTContext &C = CGM.getContext();
3453     // Build struct .kmp_privates_t. {
3454     //         /*  private vars  */
3455     //       };
3456     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3457     RD->startDefinition();
3458     for (const auto &Pair : Privates) {
3459       const VarDecl *VD = Pair.second.Original;
3460       QualType Type = VD->getType().getNonReferenceType();
3461       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3462       if (VD->hasAttrs()) {
3463         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3464              E(VD->getAttrs().end());
3465              I != E; ++I)
3466           FD->addAttr(*I);
3467       }
3468     }
3469     RD->completeDefinition();
3470     return RD;
3471   }
3472   return nullptr;
3473 }
3474 
3475 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3476 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3477                          QualType KmpInt32Ty,
3478                          QualType KmpRoutineEntryPointerQTy) {
3479   ASTContext &C = CGM.getContext();
3480   // Build struct kmp_task_t {
3481   //         void *              shareds;
3482   //         kmp_routine_entry_t routine;
3483   //         kmp_int32           part_id;
3484   //         kmp_cmplrdata_t data1;
3485   //         kmp_cmplrdata_t data2;
3486   // For taskloops additional fields:
3487   //         kmp_uint64          lb;
3488   //         kmp_uint64          ub;
3489   //         kmp_int64           st;
3490   //         kmp_int32           liter;
3491   //         void *              reductions;
3492   //       };
3493   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3494   UD->startDefinition();
3495   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3496   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3497   UD->completeDefinition();
3498   QualType KmpCmplrdataTy = C.getRecordType(UD);
3499   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3500   RD->startDefinition();
3501   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3502   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3503   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3504   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3505   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3506   if (isOpenMPTaskLoopDirective(Kind)) {
3507     QualType KmpUInt64Ty =
3508         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3509     QualType KmpInt64Ty =
3510         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3511     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3512     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3513     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3514     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3515     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3516   }
3517   RD->completeDefinition();
3518   return RD;
3519 }
3520 
3521 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3522 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3523                                      ArrayRef<PrivateDataTy> Privates) {
3524   ASTContext &C = CGM.getContext();
3525   // Build struct kmp_task_t_with_privates {
3526   //         kmp_task_t task_data;
3527   //         .kmp_privates_t. privates;
3528   //       };
3529   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3530   RD->startDefinition();
3531   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3532   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3533     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3534   RD->completeDefinition();
3535   return RD;
3536 }
3537 
3538 /// Emit a proxy function which accepts kmp_task_t as the second
3539 /// argument.
3540 /// \code
3541 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3542 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3543 ///   For taskloops:
3544 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3545 ///   tt->reductions, tt->shareds);
3546 ///   return 0;
3547 /// }
3548 /// \endcode
3549 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)3550 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3551                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3552                       QualType KmpTaskTWithPrivatesPtrQTy,
3553                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3554                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3555                       llvm::Value *TaskPrivatesMap) {
3556   ASTContext &C = CGM.getContext();
3557   FunctionArgList Args;
3558   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3559                             ImplicitParamDecl::Other);
3560   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3561                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3562                                 ImplicitParamDecl::Other);
3563   Args.push_back(&GtidArg);
3564   Args.push_back(&TaskTypeArg);
3565   const auto &TaskEntryFnInfo =
3566       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3567   llvm::FunctionType *TaskEntryTy =
3568       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3569   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3570   auto *TaskEntry = llvm::Function::Create(
3571       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3572   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3573   TaskEntry->setDoesNotRecurse();
3574   CodeGenFunction CGF(CGM);
3575   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3576                     Loc, Loc);
3577 
3578   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3579   // tt,
3580   // For taskloops:
3581   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3582   // tt->task_data.shareds);
3583   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3584       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3585   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3586       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3587       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3588   const auto *KmpTaskTWithPrivatesQTyRD =
3589       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3590   LValue Base =
3591       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3592   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3593   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3594   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3595   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3596 
3597   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3598   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3599   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3600       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3601       CGF.ConvertTypeForMem(SharedsPtrTy));
3602 
3603   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3604   llvm::Value *PrivatesParam;
3605   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3606     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3607     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3608         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3609   } else {
3610     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3611   }
3612 
3613   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3614                                TaskPrivatesMap,
3615                                CGF.Builder
3616                                    .CreatePointerBitCastOrAddrSpaceCast(
3617                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3618                                    .getPointer()};
3619   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3620                                           std::end(CommonArgs));
3621   if (isOpenMPTaskLoopDirective(Kind)) {
3622     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3623     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3624     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3625     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3626     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3627     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3628     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3629     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3630     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3631     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3632     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3633     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3634     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3635     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3636     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3637     CallArgs.push_back(LBParam);
3638     CallArgs.push_back(UBParam);
3639     CallArgs.push_back(StParam);
3640     CallArgs.push_back(LIParam);
3641     CallArgs.push_back(RParam);
3642   }
3643   CallArgs.push_back(SharedsParam);
3644 
3645   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3646                                                   CallArgs);
3647   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3648                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3649   CGF.FinishFunction();
3650   return TaskEntry;
3651 }
3652 
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3653 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3654                                             SourceLocation Loc,
3655                                             QualType KmpInt32Ty,
3656                                             QualType KmpTaskTWithPrivatesPtrQTy,
3657                                             QualType KmpTaskTWithPrivatesQTy) {
3658   ASTContext &C = CGM.getContext();
3659   FunctionArgList Args;
3660   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3661                             ImplicitParamDecl::Other);
3662   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3663                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3664                                 ImplicitParamDecl::Other);
3665   Args.push_back(&GtidArg);
3666   Args.push_back(&TaskTypeArg);
3667   const auto &DestructorFnInfo =
3668       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3669   llvm::FunctionType *DestructorFnTy =
3670       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3671   std::string Name =
3672       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3673   auto *DestructorFn =
3674       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3675                              Name, &CGM.getModule());
3676   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3677                                     DestructorFnInfo);
3678   DestructorFn->setDoesNotRecurse();
3679   CodeGenFunction CGF(CGM);
3680   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3681                     Args, Loc, Loc);
3682 
3683   LValue Base = CGF.EmitLoadOfPointerLValue(
3684       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3685       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3686   const auto *KmpTaskTWithPrivatesQTyRD =
3687       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3688   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3689   Base = CGF.EmitLValueForField(Base, *FI);
3690   for (const auto *Field :
3691        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3692     if (QualType::DestructionKind DtorKind =
3693             Field->getType().isDestructedType()) {
3694       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3695       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3696     }
3697   }
3698   CGF.FinishFunction();
3699   return DestructorFn;
3700 }
3701 
3702 /// Emit a privates mapping function for correct handling of private and
3703 /// firstprivate variables.
3704 /// \code
3705 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3706 /// **noalias priv1,...,  <tyn> **noalias privn) {
3707 ///   *priv1 = &.privates.priv1;
3708 ///   ...;
3709 ///   *privn = &.privates.privn;
3710 /// }
3711 /// \endcode
3712 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > FirstprivateVars,ArrayRef<const Expr * > LastprivateVars,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3713 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3714                                ArrayRef<const Expr *> PrivateVars,
3715                                ArrayRef<const Expr *> FirstprivateVars,
3716                                ArrayRef<const Expr *> LastprivateVars,
3717                                QualType PrivatesQTy,
3718                                ArrayRef<PrivateDataTy> Privates) {
3719   ASTContext &C = CGM.getContext();
3720   FunctionArgList Args;
3721   ImplicitParamDecl TaskPrivatesArg(
3722       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3723       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3724       ImplicitParamDecl::Other);
3725   Args.push_back(&TaskPrivatesArg);
3726   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3727   unsigned Counter = 1;
3728   for (const Expr *E : PrivateVars) {
3729     Args.push_back(ImplicitParamDecl::Create(
3730         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3731         C.getPointerType(C.getPointerType(E->getType()))
3732             .withConst()
3733             .withRestrict(),
3734         ImplicitParamDecl::Other));
3735     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3736     PrivateVarsPos[VD] = Counter;
3737     ++Counter;
3738   }
3739   for (const Expr *E : FirstprivateVars) {
3740     Args.push_back(ImplicitParamDecl::Create(
3741         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3742         C.getPointerType(C.getPointerType(E->getType()))
3743             .withConst()
3744             .withRestrict(),
3745         ImplicitParamDecl::Other));
3746     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3747     PrivateVarsPos[VD] = Counter;
3748     ++Counter;
3749   }
3750   for (const Expr *E : LastprivateVars) {
3751     Args.push_back(ImplicitParamDecl::Create(
3752         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3753         C.getPointerType(C.getPointerType(E->getType()))
3754             .withConst()
3755             .withRestrict(),
3756         ImplicitParamDecl::Other));
3757     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3758     PrivateVarsPos[VD] = Counter;
3759     ++Counter;
3760   }
3761   const auto &TaskPrivatesMapFnInfo =
3762       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3763   llvm::FunctionType *TaskPrivatesMapTy =
3764       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3765   std::string Name =
3766       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3767   auto *TaskPrivatesMap = llvm::Function::Create(
3768       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3769       &CGM.getModule());
3770   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3771                                     TaskPrivatesMapFnInfo);
3772   if (CGM.getLangOpts().Optimize) {
3773     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3774     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3775     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3776   }
3777   CodeGenFunction CGF(CGM);
3778   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3779                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3780 
3781   // *privi = &.privates.privi;
3782   LValue Base = CGF.EmitLoadOfPointerLValue(
3783       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3784       TaskPrivatesArg.getType()->castAs<PointerType>());
3785   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3786   Counter = 0;
3787   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3788     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3789     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3790     LValue RefLVal =
3791         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3792     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3793         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3794     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3795     ++Counter;
3796   }
3797   CGF.FinishFunction();
3798   return TaskPrivatesMap;
3799 }
3800 
3801 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3802 static void emitPrivatesInit(CodeGenFunction &CGF,
3803                              const OMPExecutableDirective &D,
3804                              Address KmpTaskSharedsPtr, LValue TDBase,
3805                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3806                              QualType SharedsTy, QualType SharedsPtrTy,
3807                              const OMPTaskDataTy &Data,
3808                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3809   ASTContext &C = CGF.getContext();
3810   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3811   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3812   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3813                                  ? OMPD_taskloop
3814                                  : OMPD_task;
3815   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3816   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3817   LValue SrcBase;
3818   bool IsTargetTask =
3819       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3820       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3821   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3822   // PointersArray and SizesArray. The original variables for these arrays are
3823   // not captured and we get their addresses explicitly.
3824   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3825       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3826     SrcBase = CGF.MakeAddrLValue(
3827         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3828             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3829         SharedsTy);
3830   }
3831   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3832   for (const PrivateDataTy &Pair : Privates) {
3833     const VarDecl *VD = Pair.second.PrivateCopy;
3834     const Expr *Init = VD->getAnyInitializer();
3835     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3836                              !CGF.isTrivialInitializer(Init)))) {
3837       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3838       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3839         const VarDecl *OriginalVD = Pair.second.Original;
3840         // Check if the variable is the target-based BasePointersArray,
3841         // PointersArray or SizesArray.
3842         LValue SharedRefLValue;
3843         QualType Type = PrivateLValue.getType();
3844         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3845         if (IsTargetTask && !SharedField) {
3846           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3847                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3848                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3849                          ->getNumParams() == 0 &&
3850                  isa<TranslationUnitDecl>(
3851                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3852                          ->getDeclContext()) &&
3853                  "Expected artificial target data variable.");
3854           SharedRefLValue =
3855               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3856         } else if (ForDup) {
3857           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3858           SharedRefLValue = CGF.MakeAddrLValue(
3859               Address(SharedRefLValue.getPointer(CGF),
3860                       C.getDeclAlign(OriginalVD)),
3861               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3862               SharedRefLValue.getTBAAInfo());
3863         } else if (CGF.LambdaCaptureFields.count(
3864                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3865                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3866           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3867         } else {
3868           // Processing for implicitly captured variables.
3869           InlinedOpenMPRegionRAII Region(
3870               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3871               /*HasCancel=*/false);
3872           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3873         }
3874         if (Type->isArrayType()) {
3875           // Initialize firstprivate array.
3876           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3877             // Perform simple memcpy.
3878             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3879           } else {
3880             // Initialize firstprivate array using element-by-element
3881             // initialization.
3882             CGF.EmitOMPAggregateAssign(
3883                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3884                 Type,
3885                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3886                                                   Address SrcElement) {
3887                   // Clean up any temporaries needed by the initialization.
3888                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3889                   InitScope.addPrivate(
3890                       Elem, [SrcElement]() -> Address { return SrcElement; });
3891                   (void)InitScope.Privatize();
3892                   // Emit initialization for single element.
3893                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3894                       CGF, &CapturesInfo);
3895                   CGF.EmitAnyExprToMem(Init, DestElement,
3896                                        Init->getType().getQualifiers(),
3897                                        /*IsInitializer=*/false);
3898                 });
3899           }
3900         } else {
3901           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3902           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3903             return SharedRefLValue.getAddress(CGF);
3904           });
3905           (void)InitScope.Privatize();
3906           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3907           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3908                              /*capturedByInit=*/false);
3909         }
3910       } else {
3911         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3912       }
3913     }
3914     ++FI;
3915   }
3916 }
3917 
3918 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3919 static bool checkInitIsRequired(CodeGenFunction &CGF,
3920                                 ArrayRef<PrivateDataTy> Privates) {
3921   bool InitRequired = false;
3922   for (const PrivateDataTy &Pair : Privates) {
3923     const VarDecl *VD = Pair.second.PrivateCopy;
3924     const Expr *Init = VD->getAnyInitializer();
3925     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3926                                     !CGF.isTrivialInitializer(Init));
3927     if (InitRequired)
3928       break;
3929   }
3930   return InitRequired;
3931 }
3932 
3933 
3934 /// Emit task_dup function (for initialization of
3935 /// private/firstprivate/lastprivate vars and last_iter flag)
3936 /// \code
3937 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3938 /// lastpriv) {
3939 /// // setup lastprivate flag
3940 ///    task_dst->last = lastpriv;
3941 /// // could be constructor calls here...
3942 /// }
3943 /// \endcode
3944 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3945 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3946                     const OMPExecutableDirective &D,
3947                     QualType KmpTaskTWithPrivatesPtrQTy,
3948                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3949                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3950                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3951                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3952   ASTContext &C = CGM.getContext();
3953   FunctionArgList Args;
3954   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3955                            KmpTaskTWithPrivatesPtrQTy,
3956                            ImplicitParamDecl::Other);
3957   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3958                            KmpTaskTWithPrivatesPtrQTy,
3959                            ImplicitParamDecl::Other);
3960   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3961                                 ImplicitParamDecl::Other);
3962   Args.push_back(&DstArg);
3963   Args.push_back(&SrcArg);
3964   Args.push_back(&LastprivArg);
3965   const auto &TaskDupFnInfo =
3966       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3967   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3968   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3969   auto *TaskDup = llvm::Function::Create(
3970       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3971   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3972   TaskDup->setDoesNotRecurse();
3973   CodeGenFunction CGF(CGM);
3974   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3975                     Loc);
3976 
3977   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3978       CGF.GetAddrOfLocalVar(&DstArg),
3979       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3980   // task_dst->liter = lastpriv;
3981   if (WithLastIter) {
3982     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3983     LValue Base = CGF.EmitLValueForField(
3984         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3985     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3986     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3987         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3988     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3989   }
3990 
3991   // Emit initial values for private copies (if any).
3992   assert(!Privates.empty());
3993   Address KmpTaskSharedsPtr = Address::invalid();
3994   if (!Data.FirstprivateVars.empty()) {
3995     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3996         CGF.GetAddrOfLocalVar(&SrcArg),
3997         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3998     LValue Base = CGF.EmitLValueForField(
3999         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4000     KmpTaskSharedsPtr = Address(
4001         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4002                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4003                                                   KmpTaskTShareds)),
4004                              Loc),
4005         CGM.getNaturalTypeAlignment(SharedsTy));
4006   }
4007   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4008                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4009   CGF.FinishFunction();
4010   return TaskDup;
4011 }
4012 
4013 /// Checks if destructor function is required to be generated.
4014 /// \return true if cleanups are required, false otherwise.
4015 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD)4016 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4017   bool NeedsCleanup = false;
4018   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4019   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4020   for (const FieldDecl *FD : PrivateRD->fields()) {
4021     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4022     if (NeedsCleanup)
4023       break;
4024   }
4025   return NeedsCleanup;
4026 }
4027 
4028 namespace {
4029 /// Loop generator for OpenMP iterator expression.
4030 class OMPIteratorGeneratorScope final
4031     : public CodeGenFunction::OMPPrivateScope {
4032   CodeGenFunction &CGF;
4033   const OMPIteratorExpr *E = nullptr;
4034   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4035   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4036   OMPIteratorGeneratorScope() = delete;
4037   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4038 
4039 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)4040   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4041       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4042     if (!E)
4043       return;
4044     SmallVector<llvm::Value *, 4> Uppers;
4045     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4046       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4047       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4048       addPrivate(VD, [&CGF, VD]() {
4049         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4050       });
4051       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4052       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4053         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4054                                  "counter.addr");
4055       });
4056     }
4057     Privatize();
4058 
4059     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4060       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4061       LValue CLVal =
4062           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4063                              HelperData.CounterVD->getType());
4064       // Counter = 0;
4065       CGF.EmitStoreOfScalar(
4066           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4067           CLVal);
4068       CodeGenFunction::JumpDest &ContDest =
4069           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4070       CodeGenFunction::JumpDest &ExitDest =
4071           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4072       // N = <number-of_iterations>;
4073       llvm::Value *N = Uppers[I];
4074       // cont:
4075       // if (Counter < N) goto body; else goto exit;
4076       CGF.EmitBlock(ContDest.getBlock());
4077       auto *CVal =
4078           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4079       llvm::Value *Cmp =
4080           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4081               ? CGF.Builder.CreateICmpSLT(CVal, N)
4082               : CGF.Builder.CreateICmpULT(CVal, N);
4083       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4084       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4085       // body:
4086       CGF.EmitBlock(BodyBB);
4087       // Iteri = Begini + Counter * Stepi;
4088       CGF.EmitIgnoredExpr(HelperData.Update);
4089     }
4090   }
~OMPIteratorGeneratorScope()4091   ~OMPIteratorGeneratorScope() {
4092     if (!E)
4093       return;
4094     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4095       // Counter = Counter + 1;
4096       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4097       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4098       // goto cont;
4099       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4100       // exit:
4101       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4102     }
4103   }
4104 };
4105 } // namespace
4106 
4107 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)4108 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4109   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4110   llvm::Value *Addr;
4111   if (OASE) {
4112     const Expr *Base = OASE->getBase();
4113     Addr = CGF.EmitScalarExpr(Base);
4114   } else {
4115     Addr = CGF.EmitLValue(E).getPointer(CGF);
4116   }
4117   llvm::Value *SizeVal;
4118   QualType Ty = E->getType();
4119   if (OASE) {
4120     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4121     for (const Expr *SE : OASE->getDimensions()) {
4122       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4123       Sz = CGF.EmitScalarConversion(
4124           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4125       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4126     }
4127   } else if (const auto *ASE =
4128                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4129     LValue UpAddrLVal =
4130         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4131     llvm::Value *UpAddr =
4132         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4133     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4134     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4135     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4136   } else {
4137     SizeVal = CGF.getTypeSize(Ty);
4138   }
4139   return std::make_pair(Addr, SizeVal);
4140 }
4141 
4142 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)4143 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4144   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4145   if (KmpTaskAffinityInfoTy.isNull()) {
4146     RecordDecl *KmpAffinityInfoRD =
4147         C.buildImplicitRecord("kmp_task_affinity_info_t");
4148     KmpAffinityInfoRD->startDefinition();
4149     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4150     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4151     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4152     KmpAffinityInfoRD->completeDefinition();
4153     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4154   }
4155 }
4156 
4157 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4158 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4159                               const OMPExecutableDirective &D,
4160                               llvm::Function *TaskFunction, QualType SharedsTy,
4161                               Address Shareds, const OMPTaskDataTy &Data) {
4162   ASTContext &C = CGM.getContext();
4163   llvm::SmallVector<PrivateDataTy, 4> Privates;
4164   // Aggregate privates and sort them by the alignment.
4165   const auto *I = Data.PrivateCopies.begin();
4166   for (const Expr *E : Data.PrivateVars) {
4167     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4168     Privates.emplace_back(
4169         C.getDeclAlign(VD),
4170         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4171                          /*PrivateElemInit=*/nullptr));
4172     ++I;
4173   }
4174   I = Data.FirstprivateCopies.begin();
4175   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4176   for (const Expr *E : Data.FirstprivateVars) {
4177     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4178     Privates.emplace_back(
4179         C.getDeclAlign(VD),
4180         PrivateHelpersTy(
4181             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4182             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4183     ++I;
4184     ++IElemInitRef;
4185   }
4186   I = Data.LastprivateCopies.begin();
4187   for (const Expr *E : Data.LastprivateVars) {
4188     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4189     Privates.emplace_back(
4190         C.getDeclAlign(VD),
4191         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4192                          /*PrivateElemInit=*/nullptr));
4193     ++I;
4194   }
4195   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4196     return L.first > R.first;
4197   });
4198   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4199   // Build type kmp_routine_entry_t (if not built yet).
4200   emitKmpRoutineEntryT(KmpInt32Ty);
4201   // Build type kmp_task_t (if not built yet).
4202   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4203     if (SavedKmpTaskloopTQTy.isNull()) {
4204       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4205           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4206     }
4207     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4208   } else {
4209     assert((D.getDirectiveKind() == OMPD_task ||
4210             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4211             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4212            "Expected taskloop, task or target directive");
4213     if (SavedKmpTaskTQTy.isNull()) {
4214       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4215           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4216     }
4217     KmpTaskTQTy = SavedKmpTaskTQTy;
4218   }
4219   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4220   // Build particular struct kmp_task_t for the given task.
4221   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4222       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4223   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4224   QualType KmpTaskTWithPrivatesPtrQTy =
4225       C.getPointerType(KmpTaskTWithPrivatesQTy);
4226   unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
4227   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4228   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4229       KmpTaskTWithPrivatesTy->getPointerTo(DefaultAS);
4230   llvm::Value *KmpTaskTWithPrivatesTySize =
4231       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4232   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4233 
4234   // Emit initial values for private copies (if any).
4235   llvm::Value *TaskPrivatesMap = nullptr;
4236   llvm::Type *TaskPrivatesMapTy =
4237       std::next(TaskFunction->arg_begin(), 3)->getType();
4238   if (!Privates.empty()) {
4239     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4240     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4241         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4242         FI->getType(), Privates);
4243     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4244         TaskPrivatesMap, TaskPrivatesMapTy);
4245   } else {
4246     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4247         cast<llvm::PointerType>(TaskPrivatesMapTy));
4248   }
4249   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4250   // kmp_task_t *tt);
4251   llvm::Function *TaskEntry = emitProxyTaskFunction(
4252       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4253       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4254       TaskPrivatesMap);
4255 
4256   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4257   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4258   // kmp_routine_entry_t *task_entry);
4259   // Task flags. Format is taken from
4260   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4261   // description of kmp_tasking_flags struct.
4262   enum {
4263     TiedFlag = 0x1,
4264     FinalFlag = 0x2,
4265     DestructorsFlag = 0x8,
4266     PriorityFlag = 0x20,
4267     DetachableFlag = 0x40,
4268   };
4269   unsigned Flags = Data.Tied ? TiedFlag : 0;
4270   bool NeedsCleanup = false;
4271   if (!Privates.empty()) {
4272     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4273     if (NeedsCleanup)
4274       Flags = Flags | DestructorsFlag;
4275   }
4276   if (Data.Priority.getInt())
4277     Flags = Flags | PriorityFlag;
4278   if (D.hasClausesOfKind<OMPDetachClause>())
4279     Flags = Flags | DetachableFlag;
4280   llvm::Value *TaskFlags =
4281       Data.Final.getPointer()
4282           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4283                                      CGF.Builder.getInt32(FinalFlag),
4284                                      CGF.Builder.getInt32(/*C=*/0))
4285           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4286   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4287   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4288   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4289       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4290       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4291           TaskEntry, KmpRoutineEntryPtrTy)};
4292   llvm::Value *NewTask;
4293   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4294     // Check if we have any device clause associated with the directive.
4295     const Expr *Device = nullptr;
4296     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4297       Device = C->getDevice();
4298     // Emit device ID if any otherwise use default value.
4299     llvm::Value *DeviceID;
4300     if (Device)
4301       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4302                                            CGF.Int64Ty, /*isSigned=*/true);
4303     else
4304       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4305     AllocArgs.push_back(DeviceID);
4306     NewTask = CGF.EmitRuntimeCall(
4307         OMPBuilder.getOrCreateRuntimeFunction(
4308             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4309         AllocArgs);
4310   } else {
4311     NewTask =
4312         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4313                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4314                             AllocArgs);
4315   }
4316   // Emit detach clause initialization.
4317   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4318   // task_descriptor);
4319   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4320     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4321     LValue EvtLVal = CGF.EmitLValue(Evt);
4322 
4323     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4324     // int gtid, kmp_task_t *task);
4325     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4326     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4327     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4328     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4329         OMPBuilder.getOrCreateRuntimeFunction(
4330             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4331         {Loc, Tid, NewTask});
4332     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4333                                       Evt->getExprLoc());
4334     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4335   }
4336   // Process affinity clauses.
4337   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4338     // Process list of affinity data.
4339     ASTContext &C = CGM.getContext();
4340     Address AffinitiesArray = Address::invalid();
4341     // Calculate number of elements to form the array of affinity data.
4342     llvm::Value *NumOfElements = nullptr;
4343     unsigned NumAffinities = 0;
4344     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4345       if (const Expr *Modifier = C->getModifier()) {
4346         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4347         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4348           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4349           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4350           NumOfElements =
4351               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4352         }
4353       } else {
4354         NumAffinities += C->varlist_size();
4355       }
4356     }
4357     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4358     // Fields ids in kmp_task_affinity_info record.
4359     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4360 
4361     QualType KmpTaskAffinityInfoArrayTy;
4362     if (NumOfElements) {
4363       NumOfElements = CGF.Builder.CreateNUWAdd(
4364           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4365       OpaqueValueExpr OVE(
4366           Loc,
4367           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4368           VK_RValue);
4369       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4370                                                     RValue::get(NumOfElements));
4371       KmpTaskAffinityInfoArrayTy =
4372           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4373                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4374       // Properly emit variable-sized array.
4375       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4376                                            ImplicitParamDecl::Other);
4377       CGF.EmitVarDecl(*PD);
4378       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4379       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4380                                                 /*isSigned=*/false);
4381     } else {
4382       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4383           KmpTaskAffinityInfoTy,
4384           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4385           ArrayType::Normal, /*IndexTypeQuals=*/0);
4386       AffinitiesArray =
4387           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4388       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4389       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4390                                              /*isSigned=*/false);
4391     }
4392 
4393     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4394     // Fill array by elements without iterators.
4395     unsigned Pos = 0;
4396     bool HasIterator = false;
4397     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4398       if (C->getModifier()) {
4399         HasIterator = true;
4400         continue;
4401       }
4402       for (const Expr *E : C->varlists()) {
4403         llvm::Value *Addr;
4404         llvm::Value *Size;
4405         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4406         LValue Base =
4407             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4408                                KmpTaskAffinityInfoTy);
4409         // affs[i].base_addr = &<Affinities[i].second>;
4410         LValue BaseAddrLVal = CGF.EmitLValueForField(
4411             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4412         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4413                               BaseAddrLVal);
4414         // affs[i].len = sizeof(<Affinities[i].second>);
4415         LValue LenLVal = CGF.EmitLValueForField(
4416             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4417         CGF.EmitStoreOfScalar(Size, LenLVal);
4418         ++Pos;
4419       }
4420     }
4421     LValue PosLVal;
4422     if (HasIterator) {
4423       PosLVal = CGF.MakeAddrLValue(
4424           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4425           C.getSizeType());
4426       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4427     }
4428     // Process elements with iterators.
4429     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4430       const Expr *Modifier = C->getModifier();
4431       if (!Modifier)
4432         continue;
4433       OMPIteratorGeneratorScope IteratorScope(
4434           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4435       for (const Expr *E : C->varlists()) {
4436         llvm::Value *Addr;
4437         llvm::Value *Size;
4438         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4439         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4440         LValue Base = CGF.MakeAddrLValue(
4441             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4442                     AffinitiesArray.getAlignment()),
4443             KmpTaskAffinityInfoTy);
4444         // affs[i].base_addr = &<Affinities[i].second>;
4445         LValue BaseAddrLVal = CGF.EmitLValueForField(
4446             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4447         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4448                               BaseAddrLVal);
4449         // affs[i].len = sizeof(<Affinities[i].second>);
4450         LValue LenLVal = CGF.EmitLValueForField(
4451             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4452         CGF.EmitStoreOfScalar(Size, LenLVal);
4453         Idx = CGF.Builder.CreateNUWAdd(
4454             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4455         CGF.EmitStoreOfScalar(Idx, PosLVal);
4456       }
4457     }
4458     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4459     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4460     // naffins, kmp_task_affinity_info_t *affin_list);
4461     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4462     llvm::Value *GTid = getThreadID(CGF, Loc);
4463     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4464         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4465     // FIXME: Emit the function and ignore its result for now unless the
4466     // runtime function is properly implemented.
4467     (void)CGF.EmitRuntimeCall(
4468         OMPBuilder.getOrCreateRuntimeFunction(
4469             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4470         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4471   }
4472   llvm::Value *NewTaskNewTaskTTy =
4473       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4474           NewTask, KmpTaskTWithPrivatesPtrTy);
4475   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4476                                                KmpTaskTWithPrivatesQTy);
4477   LValue TDBase =
4478       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4479   // Fill the data in the resulting kmp_task_t record.
4480   // Copy shareds if there are any.
4481   Address KmpTaskSharedsPtr = Address::invalid();
4482   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4483     KmpTaskSharedsPtr =
4484         Address(CGF.EmitLoadOfScalar(
4485                     CGF.EmitLValueForField(
4486                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4487                                            KmpTaskTShareds)),
4488                     Loc),
4489                 CGM.getNaturalTypeAlignment(SharedsTy));
4490     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4491     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4492     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4493   }
4494   // Emit initial values for private copies (if any).
4495   TaskResultTy Result;
4496   if (!Privates.empty()) {
4497     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4498                      SharedsTy, SharedsPtrTy, Data, Privates,
4499                      /*ForDup=*/false);
4500     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4501         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4502       Result.TaskDupFn = emitTaskDupFunction(
4503           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4504           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4505           /*WithLastIter=*/!Data.LastprivateVars.empty());
4506     }
4507   }
4508   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4509   enum { Priority = 0, Destructors = 1 };
4510   // Provide pointer to function with destructors for privates.
4511   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4512   const RecordDecl *KmpCmplrdataUD =
4513       (*FI)->getType()->getAsUnionType()->getDecl();
4514   if (NeedsCleanup) {
4515     llvm::Value *DestructorFn = emitDestructorsFunction(
4516         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4517         KmpTaskTWithPrivatesQTy);
4518     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4519     LValue DestructorsLV = CGF.EmitLValueForField(
4520         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4521     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4522                               DestructorFn, KmpRoutineEntryPtrTy),
4523                           DestructorsLV);
4524   }
4525   // Set priority.
4526   if (Data.Priority.getInt()) {
4527     LValue Data2LV = CGF.EmitLValueForField(
4528         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4529     LValue PriorityLV = CGF.EmitLValueForField(
4530         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4531     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4532   }
4533   Result.NewTask = NewTask;
4534   Result.TaskEntry = TaskEntry;
4535   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4536   Result.TDBase = TDBase;
4537   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4538   return Result;
4539 }
4540 
4541 namespace {
4542 /// Dependence kind for RTL.
4543 enum RTLDependenceKindTy {
4544   DepIn = 0x01,
4545   DepInOut = 0x3,
4546   DepMutexInOutSet = 0x4
4547 };
4548 /// Fields ids in kmp_depend_info record.
4549 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4550 } // namespace
4551 
4552 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4553 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4554   RTLDependenceKindTy DepKind;
4555   switch (K) {
4556   case OMPC_DEPEND_in:
4557     DepKind = DepIn;
4558     break;
4559   // Out and InOut dependencies must use the same code.
4560   case OMPC_DEPEND_out:
4561   case OMPC_DEPEND_inout:
4562     DepKind = DepInOut;
4563     break;
4564   case OMPC_DEPEND_mutexinoutset:
4565     DepKind = DepMutexInOutSet;
4566     break;
4567   case OMPC_DEPEND_source:
4568   case OMPC_DEPEND_sink:
4569   case OMPC_DEPEND_depobj:
4570   case OMPC_DEPEND_unknown:
4571     llvm_unreachable("Unknown task dependence type");
4572   }
4573   return DepKind;
4574 }
4575 
4576 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4577 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4578                            QualType &FlagsTy) {
4579   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4580   if (KmpDependInfoTy.isNull()) {
4581     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4582     KmpDependInfoRD->startDefinition();
4583     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4584     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4585     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4586     KmpDependInfoRD->completeDefinition();
4587     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4588   }
4589 }
4590 
4591 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4592 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4593                                    SourceLocation Loc) {
4594   ASTContext &C = CGM.getContext();
4595   QualType FlagsTy;
4596   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597   RecordDecl *KmpDependInfoRD =
4598       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599   LValue Base = CGF.EmitLoadOfPointerLValue(
4600       DepobjLVal.getAddress(CGF),
4601       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4602   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4603   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4604           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4605   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4606                             Base.getTBAAInfo());
4607   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4608       Addr.getPointer(),
4609       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4610   LValue NumDepsBase = CGF.MakeAddrLValue(
4611       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4612       Base.getBaseInfo(), Base.getTBAAInfo());
4613   // NumDeps = deps[i].base_addr;
4614   LValue BaseAddrLVal = CGF.EmitLValueForField(
4615       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4616   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4617   return std::make_pair(NumDeps, Base);
4618 }
4619 
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4620 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4621                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4622                            const OMPTaskDataTy::DependData &Data,
4623                            Address DependenciesArray) {
4624   CodeGenModule &CGM = CGF.CGM;
4625   ASTContext &C = CGM.getContext();
4626   QualType FlagsTy;
4627   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4628   RecordDecl *KmpDependInfoRD =
4629       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4630   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4631 
4632   OMPIteratorGeneratorScope IteratorScope(
4633       CGF, cast_or_null<OMPIteratorExpr>(
4634                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4635                                  : nullptr));
4636   for (const Expr *E : Data.DepExprs) {
4637     llvm::Value *Addr;
4638     llvm::Value *Size;
4639     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4640     LValue Base;
4641     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4642       Base = CGF.MakeAddrLValue(
4643           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4644     } else {
4645       LValue &PosLVal = *Pos.get<LValue *>();
4646       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4647       Base = CGF.MakeAddrLValue(
4648           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4649                   DependenciesArray.getAlignment()),
4650           KmpDependInfoTy);
4651     }
4652     // deps[i].base_addr = &<Dependencies[i].second>;
4653     LValue BaseAddrLVal = CGF.EmitLValueForField(
4654         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4655     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4656                           BaseAddrLVal);
4657     // deps[i].len = sizeof(<Dependencies[i].second>);
4658     LValue LenLVal = CGF.EmitLValueForField(
4659         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4660     CGF.EmitStoreOfScalar(Size, LenLVal);
4661     // deps[i].flags = <Dependencies[i].first>;
4662     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4663     LValue FlagsLVal = CGF.EmitLValueForField(
4664         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4665     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4666                           FlagsLVal);
4667     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4668       ++(*P);
4669     } else {
4670       LValue &PosLVal = *Pos.get<LValue *>();
4671       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4672       Idx = CGF.Builder.CreateNUWAdd(Idx,
4673                                      llvm::ConstantInt::get(Idx->getType(), 1));
4674       CGF.EmitStoreOfScalar(Idx, PosLVal);
4675     }
4676   }
4677 }
4678 
4679 static SmallVector<llvm::Value *, 4>
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4680 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4681                         const OMPTaskDataTy::DependData &Data) {
4682   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4683          "Expected depobj dependecy kind.");
4684   SmallVector<llvm::Value *, 4> Sizes;
4685   SmallVector<LValue, 4> SizeLVals;
4686   ASTContext &C = CGF.getContext();
4687   QualType FlagsTy;
4688   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689   RecordDecl *KmpDependInfoRD =
4690       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4692   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4693   {
4694     OMPIteratorGeneratorScope IteratorScope(
4695         CGF, cast_or_null<OMPIteratorExpr>(
4696                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4697                                    : nullptr));
4698     for (const Expr *E : Data.DepExprs) {
4699       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4700       LValue Base = CGF.EmitLoadOfPointerLValue(
4701           DepobjLVal.getAddress(CGF),
4702           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4703       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4704           Base.getAddress(CGF), KmpDependInfoPtrT);
4705       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4706                                 Base.getTBAAInfo());
4707       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4708           Addr.getPointer(),
4709           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4710       LValue NumDepsBase = CGF.MakeAddrLValue(
4711           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4712           Base.getBaseInfo(), Base.getTBAAInfo());
4713       // NumDeps = deps[i].base_addr;
4714       LValue BaseAddrLVal = CGF.EmitLValueForField(
4715           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4716       llvm::Value *NumDeps =
4717           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4718       LValue NumLVal = CGF.MakeAddrLValue(
4719           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4720           C.getUIntPtrType());
4721       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4722                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4723       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4724       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4725       CGF.EmitStoreOfScalar(Add, NumLVal);
4726       SizeLVals.push_back(NumLVal);
4727     }
4728   }
4729   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4730     llvm::Value *Size =
4731         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4732     Sizes.push_back(Size);
4733   }
4734   return Sizes;
4735 }
4736 
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4737 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4738                                LValue PosLVal,
4739                                const OMPTaskDataTy::DependData &Data,
4740                                Address DependenciesArray) {
4741   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4742          "Expected depobj dependecy kind.");
4743   ASTContext &C = CGF.getContext();
4744   QualType FlagsTy;
4745   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4746   RecordDecl *KmpDependInfoRD =
4747       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4748   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4749   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4750   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4751   {
4752     OMPIteratorGeneratorScope IteratorScope(
4753         CGF, cast_or_null<OMPIteratorExpr>(
4754                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4755                                    : nullptr));
4756     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4757       const Expr *E = Data.DepExprs[I];
4758       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4759       LValue Base = CGF.EmitLoadOfPointerLValue(
4760           DepobjLVal.getAddress(CGF),
4761           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4762       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4763           Base.getAddress(CGF), KmpDependInfoPtrT);
4764       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4765                                 Base.getTBAAInfo());
4766 
4767       // Get number of elements in a single depobj.
4768       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4769           Addr.getPointer(),
4770           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4771       LValue NumDepsBase = CGF.MakeAddrLValue(
4772           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4773           Base.getBaseInfo(), Base.getTBAAInfo());
4774       // NumDeps = deps[i].base_addr;
4775       LValue BaseAddrLVal = CGF.EmitLValueForField(
4776           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4777       llvm::Value *NumDeps =
4778           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4779 
4780       // memcopy dependency data.
4781       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4782           ElSize,
4783           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4784       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4785       Address DepAddr =
4786           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4787                   DependenciesArray.getAlignment());
4788       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4789 
4790       // Increase pos.
4791       // pos += size;
4792       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4793       CGF.EmitStoreOfScalar(Add, PosLVal);
4794     }
4795   }
4796 }
4797 
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4798 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4799     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4800     SourceLocation Loc) {
4801   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4802         return D.DepExprs.empty();
4803       }))
4804     return std::make_pair(nullptr, Address::invalid());
4805   // Process list of dependencies.
4806   ASTContext &C = CGM.getContext();
4807   Address DependenciesArray = Address::invalid();
4808   llvm::Value *NumOfElements = nullptr;
4809   unsigned NumDependencies = std::accumulate(
4810       Dependencies.begin(), Dependencies.end(), 0,
4811       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4812         return D.DepKind == OMPC_DEPEND_depobj
4813                    ? V
4814                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4815       });
4816   QualType FlagsTy;
4817   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4818   bool HasDepobjDeps = false;
4819   bool HasRegularWithIterators = false;
4820   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4821   llvm::Value *NumOfRegularWithIterators =
4822       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4823   // Calculate number of depobj dependecies and regular deps with the iterators.
4824   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4825     if (D.DepKind == OMPC_DEPEND_depobj) {
4826       SmallVector<llvm::Value *, 4> Sizes =
4827           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4828       for (llvm::Value *Size : Sizes) {
4829         NumOfDepobjElements =
4830             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4831       }
4832       HasDepobjDeps = true;
4833       continue;
4834     }
4835     // Include number of iterations, if any.
4836     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4837       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4838         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4839         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4840         NumOfRegularWithIterators =
4841             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4842       }
4843       HasRegularWithIterators = true;
4844       continue;
4845     }
4846   }
4847 
4848   QualType KmpDependInfoArrayTy;
4849   if (HasDepobjDeps || HasRegularWithIterators) {
4850     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4851                                            /*isSigned=*/false);
4852     if (HasDepobjDeps) {
4853       NumOfElements =
4854           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4855     }
4856     if (HasRegularWithIterators) {
4857       NumOfElements =
4858           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4859     }
4860     OpaqueValueExpr OVE(Loc,
4861                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4862                         VK_RValue);
4863     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4864                                                   RValue::get(NumOfElements));
4865     KmpDependInfoArrayTy =
4866         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4867                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4868     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4869     // Properly emit variable-sized array.
4870     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4871                                          ImplicitParamDecl::Other);
4872     CGF.EmitVarDecl(*PD);
4873     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4874     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4875                                               /*isSigned=*/false);
4876   } else {
4877     KmpDependInfoArrayTy = C.getConstantArrayType(
4878         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4879         ArrayType::Normal, /*IndexTypeQuals=*/0);
4880     DependenciesArray =
4881         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4882     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4883     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4884                                            /*isSigned=*/false);
4885   }
4886   unsigned Pos = 0;
4887   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4888     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4889         Dependencies[I].IteratorExpr)
4890       continue;
4891     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4892                    DependenciesArray);
4893   }
4894   // Copy regular dependecies with iterators.
4895   LValue PosLVal = CGF.MakeAddrLValue(
4896       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4897   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4898   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4899     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4900         !Dependencies[I].IteratorExpr)
4901       continue;
4902     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4903                    DependenciesArray);
4904   }
4905   // Copy final depobj arrays without iterators.
4906   if (HasDepobjDeps) {
4907     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4908       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4909         continue;
4910       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4911                          DependenciesArray);
4912     }
4913   }
4914   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4915       DependenciesArray, CGF.VoidPtrTy);
4916   return std::make_pair(NumOfElements, DependenciesArray);
4917 }
4918 
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4919 Address CGOpenMPRuntime::emitDepobjDependClause(
4920     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4921     SourceLocation Loc) {
4922   if (Dependencies.DepExprs.empty())
4923     return Address::invalid();
4924   // Process list of dependencies.
4925   ASTContext &C = CGM.getContext();
4926   Address DependenciesArray = Address::invalid();
4927   unsigned NumDependencies = Dependencies.DepExprs.size();
4928   QualType FlagsTy;
4929   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4930   RecordDecl *KmpDependInfoRD =
4931       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4932 
4933   llvm::Value *Size;
4934   // Define type kmp_depend_info[<Dependencies.size()>];
4935   // For depobj reserve one extra element to store the number of elements.
4936   // It is required to handle depobj(x) update(in) construct.
4937   // kmp_depend_info[<Dependencies.size()>] deps;
4938   llvm::Value *NumDepsVal;
4939   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4940   if (const auto *IE =
4941           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4942     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4943     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4944       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4945       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4946       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4947     }
4948     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4949                                     NumDepsVal);
4950     CharUnits SizeInBytes =
4951         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4952     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4953     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4954     NumDepsVal =
4955         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4956   } else {
4957     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4958         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4959         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4960     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4961     Size = CGM.getSize(Sz.alignTo(Align));
4962     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4963   }
4964   // Need to allocate on the dynamic memory.
4965   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4966   // Use default allocator.
4967   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4968   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4969 
4970   llvm::Value *Addr =
4971       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4972                               CGM.getModule(), OMPRTL___kmpc_alloc),
4973                           Args, ".dep.arr.addr");
4974   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4975       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4976   DependenciesArray = Address(Addr, Align);
4977   // Write number of elements in the first element of array for depobj.
4978   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4979   // deps[i].base_addr = NumDependencies;
4980   LValue BaseAddrLVal = CGF.EmitLValueForField(
4981       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4982   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4983   llvm::PointerUnion<unsigned *, LValue *> Pos;
4984   unsigned Idx = 1;
4985   LValue PosLVal;
4986   if (Dependencies.IteratorExpr) {
4987     PosLVal = CGF.MakeAddrLValue(
4988         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4989         C.getSizeType());
4990     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4991                           /*IsInit=*/true);
4992     Pos = &PosLVal;
4993   } else {
4994     Pos = &Idx;
4995   }
4996   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4997   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4999   return DependenciesArray;
5000 }
5001 
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)5002 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5003                                         SourceLocation Loc) {
5004   ASTContext &C = CGM.getContext();
5005   QualType FlagsTy;
5006   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5007   LValue Base = CGF.EmitLoadOfPointerLValue(
5008       DepobjLVal.getAddress(CGF),
5009       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5010   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5011   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5012       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5013   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5014       Addr.getPointer(),
5015       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5016   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5017                                                                CGF.VoidPtrTy);
5018   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5019   // Use default allocator.
5020   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5021   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5022 
5023   // _kmpc_free(gtid, addr, nullptr);
5024   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5025                                 CGM.getModule(), OMPRTL___kmpc_free),
5026                             Args);
5027 }
5028 
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)5029 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5030                                        OpenMPDependClauseKind NewDepKind,
5031                                        SourceLocation Loc) {
5032   ASTContext &C = CGM.getContext();
5033   QualType FlagsTy;
5034   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5035   RecordDecl *KmpDependInfoRD =
5036       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5037   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5038   llvm::Value *NumDeps;
5039   LValue Base;
5040   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5041 
5042   Address Begin = Base.getAddress(CGF);
5043   // Cast from pointer to array type to pointer to single element.
5044   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5045   // The basic structure here is a while-do loop.
5046   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5047   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5048   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5049   CGF.EmitBlock(BodyBB);
5050   llvm::PHINode *ElementPHI =
5051       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5052   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5053   Begin = Address(ElementPHI, Begin.getAlignment());
5054   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5055                             Base.getTBAAInfo());
5056   // deps[i].flags = NewDepKind;
5057   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5058   LValue FlagsLVal = CGF.EmitLValueForField(
5059       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5060   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5061                         FlagsLVal);
5062 
5063   // Shift the address forward by one element.
5064   Address ElementNext =
5065       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5066   ElementPHI->addIncoming(ElementNext.getPointer(),
5067                           CGF.Builder.GetInsertBlock());
5068   llvm::Value *IsEmpty =
5069       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5070   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5071   // Done.
5072   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5073 }
5074 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5075 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5076                                    const OMPExecutableDirective &D,
5077                                    llvm::Function *TaskFunction,
5078                                    QualType SharedsTy, Address Shareds,
5079                                    const Expr *IfCond,
5080                                    const OMPTaskDataTy &Data) {
5081   if (!CGF.HaveInsertPoint())
5082     return;
5083 
5084   TaskResultTy Result =
5085       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5086   llvm::Value *NewTask = Result.NewTask;
5087   llvm::Function *TaskEntry = Result.TaskEntry;
5088   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5089   LValue TDBase = Result.TDBase;
5090   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5091   // Process list of dependences.
5092   Address DependenciesArray = Address::invalid();
5093   llvm::Value *NumOfElements;
5094   std::tie(NumOfElements, DependenciesArray) =
5095       emitDependClause(CGF, Data.Dependences, Loc);
5096 
5097   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5098   // libcall.
5099   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5100   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5101   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5102   // list is not empty
5103   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5104   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5105   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5106   llvm::Value *DepTaskArgs[7];
5107   if (!Data.Dependences.empty()) {
5108     DepTaskArgs[0] = UpLoc;
5109     DepTaskArgs[1] = ThreadID;
5110     DepTaskArgs[2] = NewTask;
5111     DepTaskArgs[3] = NumOfElements;
5112     DepTaskArgs[4] = DependenciesArray.getPointer();
5113     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5114     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5115   }
5116   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5117                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5118     if (!Data.Tied) {
5119       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5120       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5121       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5122     }
5123     if (!Data.Dependences.empty()) {
5124       CGF.EmitRuntimeCall(
5125           OMPBuilder.getOrCreateRuntimeFunction(
5126               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5127           DepTaskArgs);
5128     } else {
5129       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5130                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5131                           TaskArgs);
5132     }
5133     // Check if parent region is untied and build return for untied task;
5134     if (auto *Region =
5135             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5136       Region->emitUntiedSwitch(CGF);
5137   };
5138 
5139   llvm::Value *DepWaitTaskArgs[6];
5140   if (!Data.Dependences.empty()) {
5141     DepWaitTaskArgs[0] = UpLoc;
5142     DepWaitTaskArgs[1] = ThreadID;
5143     DepWaitTaskArgs[2] = NumOfElements;
5144     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5145     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5146     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5147   }
5148   auto &M = CGM.getModule();
5149   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5150                         TaskEntry, &Data, &DepWaitTaskArgs,
5151                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5152     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5153     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5154     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5155     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5156     // is specified.
5157     if (!Data.Dependences.empty())
5158       CGF.EmitRuntimeCall(
5159           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5160           DepWaitTaskArgs);
5161     // Call proxy_task_entry(gtid, new_task);
5162     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5163                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5164       Action.Enter(CGF);
5165       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5166       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5167                                                           OutlinedFnArgs);
5168     };
5169 
5170     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5171     // kmp_task_t *new_task);
5172     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5173     // kmp_task_t *new_task);
5174     RegionCodeGenTy RCG(CodeGen);
5175     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5176                               M, OMPRTL___kmpc_omp_task_begin_if0),
5177                           TaskArgs,
5178                           OMPBuilder.getOrCreateRuntimeFunction(
5179                               M, OMPRTL___kmpc_omp_task_complete_if0),
5180                           TaskArgs);
5181     RCG.setAction(Action);
5182     RCG(CGF);
5183   };
5184 
5185   if (IfCond) {
5186     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5187   } else {
5188     RegionCodeGenTy ThenRCG(ThenCodeGen);
5189     ThenRCG(CGF);
5190   }
5191 }
5192 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5193 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5194                                        const OMPLoopDirective &D,
5195                                        llvm::Function *TaskFunction,
5196                                        QualType SharedsTy, Address Shareds,
5197                                        const Expr *IfCond,
5198                                        const OMPTaskDataTy &Data) {
5199   if (!CGF.HaveInsertPoint())
5200     return;
5201   TaskResultTy Result =
5202       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5203   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5204   // libcall.
5205   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5206   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5207   // sched, kmp_uint64 grainsize, void *task_dup);
5208   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5209   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5210   llvm::Value *IfVal;
5211   if (IfCond) {
5212     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5213                                       /*isSigned=*/true);
5214   } else {
5215     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5216   }
5217 
5218   LValue LBLVal = CGF.EmitLValueForField(
5219       Result.TDBase,
5220       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5221   const auto *LBVar =
5222       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5223   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5224                        LBLVal.getQuals(),
5225                        /*IsInitializer=*/true);
5226   LValue UBLVal = CGF.EmitLValueForField(
5227       Result.TDBase,
5228       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5229   const auto *UBVar =
5230       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5231   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5232                        UBLVal.getQuals(),
5233                        /*IsInitializer=*/true);
5234   LValue StLVal = CGF.EmitLValueForField(
5235       Result.TDBase,
5236       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5237   const auto *StVar =
5238       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5239   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5240                        StLVal.getQuals(),
5241                        /*IsInitializer=*/true);
5242   // Store reductions address.
5243   LValue RedLVal = CGF.EmitLValueForField(
5244       Result.TDBase,
5245       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5246   if (Data.Reductions) {
5247     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5248   } else {
5249     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5250                                CGF.getContext().VoidPtrTy);
5251   }
5252   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5253   llvm::Value *TaskArgs[] = {
5254       UpLoc,
5255       ThreadID,
5256       Result.NewTask,
5257       IfVal,
5258       LBLVal.getPointer(CGF),
5259       UBLVal.getPointer(CGF),
5260       CGF.EmitLoadOfScalar(StLVal, Loc),
5261       llvm::ConstantInt::getSigned(
5262           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5263       llvm::ConstantInt::getSigned(
5264           CGF.IntTy, Data.Schedule.getPointer()
5265                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5266                          : NoSchedule),
5267       Data.Schedule.getPointer()
5268           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5269                                       /*isSigned=*/false)
5270           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5271       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5272                              Result.TaskDupFn, CGF.VoidPtrTy)
5273                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5274   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5275                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5276                       TaskArgs);
5277 }
5278 
5279 /// Emit reduction operation for each element of array (required for
5280 /// array sections) LHS op = RHS.
5281 /// \param Type Type of array.
5282 /// \param LHSVar Variable on the left side of the reduction operation
5283 /// (references element of array in original variable).
5284 /// \param RHSVar Variable on the right side of the reduction operation
5285 /// (references element of array in original variable).
5286 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5287 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5288 static void EmitOMPAggregateReduction(
5289     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5290     const VarDecl *RHSVar,
5291     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5292                                   const Expr *, const Expr *)> &RedOpGen,
5293     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5294     const Expr *UpExpr = nullptr) {
5295   // Perform element-by-element initialization.
5296   QualType ElementTy;
5297   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5298   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5299 
5300   // Drill down to the base element type on both arrays.
5301   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5302   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5303 
5304   llvm::Value *RHSBegin = RHSAddr.getPointer();
5305   llvm::Value *LHSBegin = LHSAddr.getPointer();
5306   // Cast from pointer to array type to pointer to single element.
5307   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5308   // The basic structure here is a while-do loop.
5309   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5310   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5311   llvm::Value *IsEmpty =
5312       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5313   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5314 
5315   // Enter the loop body, making that address the current address.
5316   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5317   CGF.EmitBlock(BodyBB);
5318 
5319   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5320 
5321   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5322       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5323   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5324   Address RHSElementCurrent =
5325       Address(RHSElementPHI,
5326               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5327 
5328   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5329       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5330   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5331   Address LHSElementCurrent =
5332       Address(LHSElementPHI,
5333               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5334 
5335   // Emit copy.
5336   CodeGenFunction::OMPPrivateScope Scope(CGF);
5337   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5338   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5339   Scope.Privatize();
5340   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5341   Scope.ForceCleanup();
5342 
5343   // Shift the address forward by one element.
5344   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5345       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5346   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5347       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5348   // Check whether we've reached the end.
5349   llvm::Value *Done =
5350       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5351   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5352   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5353   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5354 
5355   // Done.
5356   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5357 }
5358 
5359 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5360 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5361 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5362 static void emitReductionCombiner(CodeGenFunction &CGF,
5363                                   const Expr *ReductionOp) {
5364   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5365     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5366       if (const auto *DRE =
5367               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5368         if (const auto *DRD =
5369                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5370           std::pair<llvm::Function *, llvm::Function *> Reduction =
5371               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5372           RValue Func = RValue::get(Reduction.first);
5373           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5374           CGF.EmitIgnoredExpr(ReductionOp);
5375           return;
5376         }
5377   CGF.EmitIgnoredExpr(ReductionOp);
5378 }
5379 
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5380 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5381     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5382     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5383     ArrayRef<const Expr *> ReductionOps) {
5384   ASTContext &C = CGM.getContext();
5385 
5386   // void reduction_func(void *LHSArg, void *RHSArg);
5387   FunctionArgList Args;
5388   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5389                            ImplicitParamDecl::Other);
5390   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5391                            ImplicitParamDecl::Other);
5392   Args.push_back(&LHSArg);
5393   Args.push_back(&RHSArg);
5394   const auto &CGFI =
5395       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5396   std::string Name = getName({"omp", "reduction", "reduction_func"});
5397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5398                                     llvm::GlobalValue::InternalLinkage, Name,
5399                                     &CGM.getModule());
5400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5401   Fn->setDoesNotRecurse();
5402   CodeGenFunction CGF(CGM);
5403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5404 
5405   // Dst = (void*[n])(LHSArg);
5406   // Src = (void*[n])(RHSArg);
5407   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5409       ArgsType), CGF.getPointerAlign());
5410   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5411       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5412       ArgsType), CGF.getPointerAlign());
5413 
5414   //  ...
5415   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5416   //  ...
5417   CodeGenFunction::OMPPrivateScope Scope(CGF);
5418   auto IPriv = Privates.begin();
5419   unsigned Idx = 0;
5420   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5421     const auto *RHSVar =
5422         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5423     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5424       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5425     });
5426     const auto *LHSVar =
5427         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5428     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5429       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5430     });
5431     QualType PrivTy = (*IPriv)->getType();
5432     if (PrivTy->isVariablyModifiedType()) {
5433       // Get array size and emit VLA type.
5434       ++Idx;
5435       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5436       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5437       const VariableArrayType *VLA =
5438           CGF.getContext().getAsVariableArrayType(PrivTy);
5439       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5440       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5441           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5442       CGF.EmitVariablyModifiedType(PrivTy);
5443     }
5444   }
5445   Scope.Privatize();
5446   IPriv = Privates.begin();
5447   auto ILHS = LHSExprs.begin();
5448   auto IRHS = RHSExprs.begin();
5449   for (const Expr *E : ReductionOps) {
5450     if ((*IPriv)->getType()->isArrayType()) {
5451       // Emit reduction for array section.
5452       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5453       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5454       EmitOMPAggregateReduction(
5455           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5456           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5457             emitReductionCombiner(CGF, E);
5458           });
5459     } else {
5460       // Emit reduction for array subscript or single variable.
5461       emitReductionCombiner(CGF, E);
5462     }
5463     ++IPriv;
5464     ++ILHS;
5465     ++IRHS;
5466   }
5467   Scope.ForceCleanup();
5468   CGF.FinishFunction();
5469   return Fn;
5470 }
5471 
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5472 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5473                                                   const Expr *ReductionOp,
5474                                                   const Expr *PrivateRef,
5475                                                   const DeclRefExpr *LHS,
5476                                                   const DeclRefExpr *RHS) {
5477   if (PrivateRef->getType()->isArrayType()) {
5478     // Emit reduction for array section.
5479     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5480     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5481     EmitOMPAggregateReduction(
5482         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5483         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5484           emitReductionCombiner(CGF, ReductionOp);
5485         });
5486   } else {
5487     // Emit reduction for array subscript or single variable.
5488     emitReductionCombiner(CGF, ReductionOp);
5489   }
5490 }
5491 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5492 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5493                                     ArrayRef<const Expr *> Privates,
5494                                     ArrayRef<const Expr *> LHSExprs,
5495                                     ArrayRef<const Expr *> RHSExprs,
5496                                     ArrayRef<const Expr *> ReductionOps,
5497                                     ReductionOptionsTy Options) {
5498   if (!CGF.HaveInsertPoint())
5499     return;
5500 
5501   bool WithNowait = Options.WithNowait;
5502   bool SimpleReduction = Options.SimpleReduction;
5503 
5504   // Next code should be emitted for reduction:
5505   //
5506   // static kmp_critical_name lock = { 0 };
5507   //
5508   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5509   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5510   //  ...
5511   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5512   //  *(Type<n>-1*)rhs[<n>-1]);
5513   // }
5514   //
5515   // ...
5516   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5517   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5518   // RedList, reduce_func, &<lock>)) {
5519   // case 1:
5520   //  ...
5521   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5522   //  ...
5523   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5524   // break;
5525   // case 2:
5526   //  ...
5527   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5528   //  ...
5529   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5530   // break;
5531   // default:;
5532   // }
5533   //
5534   // if SimpleReduction is true, only the next code is generated:
5535   //  ...
5536   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5537   //  ...
5538 
5539   ASTContext &C = CGM.getContext();
5540 
5541   if (SimpleReduction) {
5542     CodeGenFunction::RunCleanupsScope Scope(CGF);
5543     auto IPriv = Privates.begin();
5544     auto ILHS = LHSExprs.begin();
5545     auto IRHS = RHSExprs.begin();
5546     for (const Expr *E : ReductionOps) {
5547       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5548                                   cast<DeclRefExpr>(*IRHS));
5549       ++IPriv;
5550       ++ILHS;
5551       ++IRHS;
5552     }
5553     return;
5554   }
5555 
5556   // 1. Build a list of reduction variables.
5557   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5558   auto Size = RHSExprs.size();
5559   for (const Expr *E : Privates) {
5560     if (E->getType()->isVariablyModifiedType())
5561       // Reserve place for array size.
5562       ++Size;
5563   }
5564   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5565   QualType ReductionArrayTy =
5566       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5567                              /*IndexTypeQuals=*/0);
5568   Address ReductionList =
5569       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5570   auto IPriv = Privates.begin();
5571   unsigned Idx = 0;
5572   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5573     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5574     CGF.Builder.CreateStore(
5575         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5576             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5577         Elem);
5578     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5579       // Store array size.
5580       ++Idx;
5581       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5582       llvm::Value *Size = CGF.Builder.CreateIntCast(
5583           CGF.getVLASize(
5584                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5585               .NumElts,
5586           CGF.SizeTy, /*isSigned=*/false);
5587       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5588                               Elem);
5589     }
5590   }
5591 
5592   // 2. Emit reduce_func().
5593   unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
5594   llvm::Function *ReductionFn = emitReductionFunction(
5595       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(DefaultAS), Privates,
5596       LHSExprs, RHSExprs, ReductionOps);
5597 
5598   // 3. Create static kmp_critical_name lock = { 0 };
5599   std::string Name = getName({"reduction"});
5600   llvm::Value *Lock = getCriticalRegionLock(Name);
5601 
5602   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5603   // RedList, reduce_func, &<lock>);
5604   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5605   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5606   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5607   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5608       ReductionList.getPointer(), CGF.VoidPtrTy);
5609   llvm::Value *Args[] = {
5610       IdentTLoc,                             // ident_t *<loc>
5611       ThreadId,                              // i32 <gtid>
5612       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5613       ReductionArrayTySize,                  // size_type sizeof(RedList)
5614       RL,                                    // void *RedList
5615       ReductionFn, // void (*) (void *, void *) <reduce_func>
5616       Lock         // kmp_critical_name *&<lock>
5617   };
5618   llvm::Value *Res = CGF.EmitRuntimeCall(
5619       OMPBuilder.getOrCreateRuntimeFunction(
5620           CGM.getModule(),
5621           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5622       Args);
5623 
5624   // 5. Build switch(res)
5625   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5626   llvm::SwitchInst *SwInst =
5627       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5628 
5629   // 6. Build case 1:
5630   //  ...
5631   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5632   //  ...
5633   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5634   // break;
5635   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5636   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5637   CGF.EmitBlock(Case1BB);
5638 
5639   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5640   llvm::Value *EndArgs[] = {
5641       IdentTLoc, // ident_t *<loc>
5642       ThreadId,  // i32 <gtid>
5643       Lock       // kmp_critical_name *&<lock>
5644   };
5645   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5646                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5647     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5648     auto IPriv = Privates.begin();
5649     auto ILHS = LHSExprs.begin();
5650     auto IRHS = RHSExprs.begin();
5651     for (const Expr *E : ReductionOps) {
5652       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5653                                      cast<DeclRefExpr>(*IRHS));
5654       ++IPriv;
5655       ++ILHS;
5656       ++IRHS;
5657     }
5658   };
5659   RegionCodeGenTy RCG(CodeGen);
5660   CommonActionTy Action(
5661       nullptr, llvm::None,
5662       OMPBuilder.getOrCreateRuntimeFunction(
5663           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5664                                       : OMPRTL___kmpc_end_reduce),
5665       EndArgs);
5666   RCG.setAction(Action);
5667   RCG(CGF);
5668 
5669   CGF.EmitBranch(DefaultBB);
5670 
5671   // 7. Build case 2:
5672   //  ...
5673   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5674   //  ...
5675   // break;
5676   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5677   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5678   CGF.EmitBlock(Case2BB);
5679 
5680   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5681                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5682     auto ILHS = LHSExprs.begin();
5683     auto IRHS = RHSExprs.begin();
5684     auto IPriv = Privates.begin();
5685     for (const Expr *E : ReductionOps) {
5686       const Expr *XExpr = nullptr;
5687       const Expr *EExpr = nullptr;
5688       const Expr *UpExpr = nullptr;
5689       BinaryOperatorKind BO = BO_Comma;
5690       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5691         if (BO->getOpcode() == BO_Assign) {
5692           XExpr = BO->getLHS();
5693           UpExpr = BO->getRHS();
5694         }
5695       }
5696       // Try to emit update expression as a simple atomic.
5697       const Expr *RHSExpr = UpExpr;
5698       if (RHSExpr) {
5699         // Analyze RHS part of the whole expression.
5700         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5701                 RHSExpr->IgnoreParenImpCasts())) {
5702           // If this is a conditional operator, analyze its condition for
5703           // min/max reduction operator.
5704           RHSExpr = ACO->getCond();
5705         }
5706         if (const auto *BORHS =
5707                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5708           EExpr = BORHS->getRHS();
5709           BO = BORHS->getOpcode();
5710         }
5711       }
5712       if (XExpr) {
5713         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5714         auto &&AtomicRedGen = [BO, VD,
5715                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5716                                     const Expr *EExpr, const Expr *UpExpr) {
5717           LValue X = CGF.EmitLValue(XExpr);
5718           RValue E;
5719           if (EExpr)
5720             E = CGF.EmitAnyExpr(EExpr);
5721           CGF.EmitOMPAtomicSimpleUpdateExpr(
5722               X, E, BO, /*IsXLHSInRHSPart=*/true,
5723               llvm::AtomicOrdering::Monotonic, Loc,
5724               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5725                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5726                 PrivateScope.addPrivate(
5727                     VD, [&CGF, VD, XRValue, Loc]() {
5728                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5729                       CGF.emitOMPSimpleStore(
5730                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5731                           VD->getType().getNonReferenceType(), Loc);
5732                       return LHSTemp;
5733                     });
5734                 (void)PrivateScope.Privatize();
5735                 return CGF.EmitAnyExpr(UpExpr);
5736               });
5737         };
5738         if ((*IPriv)->getType()->isArrayType()) {
5739           // Emit atomic reduction for array section.
5740           const auto *RHSVar =
5741               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5742           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5743                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5744         } else {
5745           // Emit atomic reduction for array subscript or single variable.
5746           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5747         }
5748       } else {
5749         // Emit as a critical region.
5750         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5751                                            const Expr *, const Expr *) {
5752           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5753           std::string Name = RT.getName({"atomic_reduction"});
5754           RT.emitCriticalRegion(
5755               CGF, Name,
5756               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5757                 Action.Enter(CGF);
5758                 emitReductionCombiner(CGF, E);
5759               },
5760               Loc);
5761         };
5762         if ((*IPriv)->getType()->isArrayType()) {
5763           const auto *LHSVar =
5764               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5765           const auto *RHSVar =
5766               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5767           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5768                                     CritRedGen);
5769         } else {
5770           CritRedGen(CGF, nullptr, nullptr, nullptr);
5771         }
5772       }
5773       ++ILHS;
5774       ++IRHS;
5775       ++IPriv;
5776     }
5777   };
5778   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5779   if (!WithNowait) {
5780     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5781     llvm::Value *EndArgs[] = {
5782         IdentTLoc, // ident_t *<loc>
5783         ThreadId,  // i32 <gtid>
5784         Lock       // kmp_critical_name *&<lock>
5785     };
5786     CommonActionTy Action(nullptr, llvm::None,
5787                           OMPBuilder.getOrCreateRuntimeFunction(
5788                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5789                           EndArgs);
5790     AtomicRCG.setAction(Action);
5791     AtomicRCG(CGF);
5792   } else {
5793     AtomicRCG(CGF);
5794   }
5795 
5796   CGF.EmitBranch(DefaultBB);
5797   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5798 }
5799 
5800 /// Generates unique name for artificial threadprivate variables.
5801 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5802 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5803                                       const Expr *Ref) {
5804   SmallString<256> Buffer;
5805   llvm::raw_svector_ostream Out(Buffer);
5806   const clang::DeclRefExpr *DE;
5807   const VarDecl *D = ::getBaseDecl(Ref, DE);
5808   if (!D)
5809     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5810   D = D->getCanonicalDecl();
5811   std::string Name = CGM.getOpenMPRuntime().getName(
5812       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5813   Out << Prefix << Name << "_"
5814       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5815   return std::string(Out.str());
5816 }
5817 
5818 /// Emits reduction initializer function:
5819 /// \code
5820 /// void @.red_init(void* %arg, void* %orig) {
5821 /// %0 = bitcast void* %arg to <type>*
5822 /// store <type> <init>, <type>* %0
5823 /// ret void
5824 /// }
5825 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5826 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5827                                            SourceLocation Loc,
5828                                            ReductionCodeGen &RCG, unsigned N) {
5829   ASTContext &C = CGM.getContext();
5830   QualType VoidPtrTy = C.VoidPtrTy;
5831   VoidPtrTy.addRestrict();
5832   FunctionArgList Args;
5833   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5834                           ImplicitParamDecl::Other);
5835   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5836                               ImplicitParamDecl::Other);
5837   Args.emplace_back(&Param);
5838   Args.emplace_back(&ParamOrig);
5839   const auto &FnInfo =
5840       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5841   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5842   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5843   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5844                                     Name, &CGM.getModule());
5845   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5846   Fn->setDoesNotRecurse();
5847   CodeGenFunction CGF(CGM);
5848   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5849   Address PrivateAddr = CGF.EmitLoadOfPointer(
5850       CGF.GetAddrOfLocalVar(&Param),
5851       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5852   llvm::Value *Size = nullptr;
5853   // If the size of the reduction item is non-constant, load it from global
5854   // threadprivate variable.
5855   if (RCG.getSizes(N).second) {
5856     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5857         CGF, CGM.getContext().getSizeType(),
5858         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5859     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5860                                 CGM.getContext().getSizeType(), Loc);
5861   }
5862   RCG.emitAggregateType(CGF, N, Size);
5863   LValue OrigLVal;
5864   // If initializer uses initializer from declare reduction construct, emit a
5865   // pointer to the address of the original reduction item (reuired by reduction
5866   // initializer)
5867   if (RCG.usesReductionInitializer(N)) {
5868     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5869     SharedAddr = CGF.EmitLoadOfPointer(
5870         SharedAddr,
5871         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5872     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5873   } else {
5874     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5875         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5876         CGM.getContext().VoidPtrTy);
5877   }
5878   // Emit the initializer:
5879   // %0 = bitcast void* %arg to <type>*
5880   // store <type> <init>, <type>* %0
5881   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5882                          [](CodeGenFunction &) { return false; });
5883   CGF.FinishFunction();
5884   return Fn;
5885 }
5886 
5887 /// Emits reduction combiner function:
5888 /// \code
5889 /// void @.red_comb(void* %arg0, void* %arg1) {
5890 /// %lhs = bitcast void* %arg0 to <type>*
5891 /// %rhs = bitcast void* %arg1 to <type>*
5892 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5893 /// store <type> %2, <type>* %lhs
5894 /// ret void
5895 /// }
5896 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5897 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5898                                            SourceLocation Loc,
5899                                            ReductionCodeGen &RCG, unsigned N,
5900                                            const Expr *ReductionOp,
5901                                            const Expr *LHS, const Expr *RHS,
5902                                            const Expr *PrivateRef) {
5903   ASTContext &C = CGM.getContext();
5904   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5905   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5906   FunctionArgList Args;
5907   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5908                                C.VoidPtrTy, ImplicitParamDecl::Other);
5909   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5910                             ImplicitParamDecl::Other);
5911   Args.emplace_back(&ParamInOut);
5912   Args.emplace_back(&ParamIn);
5913   const auto &FnInfo =
5914       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5915   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5916   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5917   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5918                                     Name, &CGM.getModule());
5919   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5920   Fn->setDoesNotRecurse();
5921   CodeGenFunction CGF(CGM);
5922   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5923   llvm::Value *Size = nullptr;
5924   // If the size of the reduction item is non-constant, load it from global
5925   // threadprivate variable.
5926   if (RCG.getSizes(N).second) {
5927     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5928         CGF, CGM.getContext().getSizeType(),
5929         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5930     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5931                                 CGM.getContext().getSizeType(), Loc);
5932   }
5933   RCG.emitAggregateType(CGF, N, Size);
5934   // Remap lhs and rhs variables to the addresses of the function arguments.
5935   // %lhs = bitcast void* %arg0 to <type>*
5936   // %rhs = bitcast void* %arg1 to <type>*
5937   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5938   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5939     // Pull out the pointer to the variable.
5940     Address PtrAddr = CGF.EmitLoadOfPointer(
5941         CGF.GetAddrOfLocalVar(&ParamInOut),
5942         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5943     return CGF.Builder.CreateElementBitCast(
5944         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5945   });
5946   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5947     // Pull out the pointer to the variable.
5948     Address PtrAddr = CGF.EmitLoadOfPointer(
5949         CGF.GetAddrOfLocalVar(&ParamIn),
5950         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5951     return CGF.Builder.CreateElementBitCast(
5952         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5953   });
5954   PrivateScope.Privatize();
5955   // Emit the combiner body:
5956   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5957   // store <type> %2, <type>* %lhs
5958   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5959       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5960       cast<DeclRefExpr>(RHS));
5961   CGF.FinishFunction();
5962   return Fn;
5963 }
5964 
5965 /// Emits reduction finalizer function:
5966 /// \code
5967 /// void @.red_fini(void* %arg) {
5968 /// %0 = bitcast void* %arg to <type>*
5969 /// <destroy>(<type>* %0)
5970 /// ret void
5971 /// }
5972 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5973 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5974                                            SourceLocation Loc,
5975                                            ReductionCodeGen &RCG, unsigned N) {
5976   if (!RCG.needCleanups(N))
5977     return nullptr;
5978   ASTContext &C = CGM.getContext();
5979   FunctionArgList Args;
5980   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5981                           ImplicitParamDecl::Other);
5982   Args.emplace_back(&Param);
5983   const auto &FnInfo =
5984       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5985   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5986   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5987   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5988                                     Name, &CGM.getModule());
5989   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5990   Fn->setDoesNotRecurse();
5991   CodeGenFunction CGF(CGM);
5992   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5993   Address PrivateAddr = CGF.EmitLoadOfPointer(
5994       CGF.GetAddrOfLocalVar(&Param),
5995       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996   llvm::Value *Size = nullptr;
5997   // If the size of the reduction item is non-constant, load it from global
5998   // threadprivate variable.
5999   if (RCG.getSizes(N).second) {
6000     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6001         CGF, CGM.getContext().getSizeType(),
6002         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6003     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6004                                 CGM.getContext().getSizeType(), Loc);
6005   }
6006   RCG.emitAggregateType(CGF, N, Size);
6007   // Emit the finalizer body:
6008   // <destroy>(<type>* %0)
6009   RCG.emitCleanups(CGF, N, PrivateAddr);
6010   CGF.FinishFunction(Loc);
6011   return Fn;
6012 }
6013 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)6014 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6015     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6016     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6017   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6018     return nullptr;
6019 
6020   // Build typedef struct:
6021   // kmp_taskred_input {
6022   //   void *reduce_shar; // shared reduction item
6023   //   void *reduce_orig; // original reduction item used for initialization
6024   //   size_t reduce_size; // size of data item
6025   //   void *reduce_init; // data initialization routine
6026   //   void *reduce_fini; // data finalization routine
6027   //   void *reduce_comb; // data combiner routine
6028   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6029   // } kmp_taskred_input_t;
6030   ASTContext &C = CGM.getContext();
6031   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6032   RD->startDefinition();
6033   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6036   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6037   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6038   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6039   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6040       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6041   RD->completeDefinition();
6042   QualType RDType = C.getRecordType(RD);
6043   unsigned Size = Data.ReductionVars.size();
6044   llvm::APInt ArraySize(/*numBits=*/64, Size);
6045   QualType ArrayRDType = C.getConstantArrayType(
6046       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6047   // kmp_task_red_input_t .rd_input.[Size];
6048   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6049   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6050                        Data.ReductionCopies, Data.ReductionOps);
6051   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6052     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6053     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6054                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6055     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6056         TaskRedInput.getPointer(), Idxs,
6057         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6058         ".rd_input.gep.");
6059     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6060     // ElemLVal.reduce_shar = &Shareds[Cnt];
6061     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6062     RCG.emitSharedOrigLValue(CGF, Cnt);
6063     llvm::Value *CastedShared =
6064         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6065     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6066     // ElemLVal.reduce_orig = &Origs[Cnt];
6067     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6068     llvm::Value *CastedOrig =
6069         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6070     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6071     RCG.emitAggregateType(CGF, Cnt);
6072     llvm::Value *SizeValInChars;
6073     llvm::Value *SizeVal;
6074     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6075     // We use delayed creation/initialization for VLAs and array sections. It is
6076     // required because runtime does not provide the way to pass the sizes of
6077     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6078     // threadprivate global variables are used to store these values and use
6079     // them in the functions.
6080     bool DelayedCreation = !!SizeVal;
6081     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6082                                                /*isSigned=*/false);
6083     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6084     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6085     // ElemLVal.reduce_init = init;
6086     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6087     llvm::Value *InitAddr =
6088         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6089     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6090     // ElemLVal.reduce_fini = fini;
6091     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6092     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6093     llvm::Value *FiniAddr = Fini
6094                                 ? CGF.EmitCastToVoidPtr(Fini)
6095                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6096     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6097     // ElemLVal.reduce_comb = comb;
6098     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6099     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6100         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6101         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6102     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6103     // ElemLVal.flags = 0;
6104     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6105     if (DelayedCreation) {
6106       CGF.EmitStoreOfScalar(
6107           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6108           FlagsLVal);
6109     } else
6110       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6111                                  FlagsLVal.getType());
6112   }
6113   if (Data.IsReductionWithTaskMod) {
6114     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6115     // is_ws, int num, void *data);
6116     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6117     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6118                                                   CGM.IntTy, /*isSigned=*/true);
6119     llvm::Value *Args[] = {
6120         IdentTLoc, GTid,
6121         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6122                                /*isSigned=*/true),
6123         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6124         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6125             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6126     return CGF.EmitRuntimeCall(
6127         OMPBuilder.getOrCreateRuntimeFunction(
6128             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6129         Args);
6130   }
6131   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6132   llvm::Value *Args[] = {
6133       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6134                                 /*isSigned=*/true),
6135       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6136       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6137                                                       CGM.VoidPtrTy)};
6138   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6139                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6140                              Args);
6141 }
6142 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)6143 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6144                                             SourceLocation Loc,
6145                                             bool IsWorksharingReduction) {
6146   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6147   // is_ws, int num, void *data);
6148   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6149   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6150                                                 CGM.IntTy, /*isSigned=*/true);
6151   llvm::Value *Args[] = {IdentTLoc, GTid,
6152                          llvm::ConstantInt::get(CGM.IntTy,
6153                                                 IsWorksharingReduction ? 1 : 0,
6154                                                 /*isSigned=*/true)};
6155   (void)CGF.EmitRuntimeCall(
6156       OMPBuilder.getOrCreateRuntimeFunction(
6157           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6158       Args);
6159 }
6160 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6161 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6162                                               SourceLocation Loc,
6163                                               ReductionCodeGen &RCG,
6164                                               unsigned N) {
6165   auto Sizes = RCG.getSizes(N);
6166   // Emit threadprivate global variable if the type is non-constant
6167   // (Sizes.second = nullptr).
6168   if (Sizes.second) {
6169     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6170                                                      /*isSigned=*/false);
6171     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6172         CGF, CGM.getContext().getSizeType(),
6173         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6174     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6175   }
6176 }
6177 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6178 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6179                                               SourceLocation Loc,
6180                                               llvm::Value *ReductionsPtr,
6181                                               LValue SharedLVal) {
6182   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6183   // *d);
6184   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6185                                                    CGM.IntTy,
6186                                                    /*isSigned=*/true),
6187                          ReductionsPtr,
6188                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6189                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6190   return Address(
6191       CGF.EmitRuntimeCall(
6192           OMPBuilder.getOrCreateRuntimeFunction(
6193               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6194           Args),
6195       SharedLVal.getAlignment());
6196 }
6197 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)6198 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6199                                        SourceLocation Loc) {
6200   if (!CGF.HaveInsertPoint())
6201     return;
6202 
6203   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6204     OMPBuilder.CreateTaskwait(CGF.Builder);
6205   } else {
6206     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6207     // global_tid);
6208     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6209     // Ignore return result until untied tasks are supported.
6210     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6211                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6212                         Args);
6213   }
6214 
6215   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6216     Region->emitUntiedSwitch(CGF);
6217 }
6218 
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6219 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6220                                            OpenMPDirectiveKind InnerKind,
6221                                            const RegionCodeGenTy &CodeGen,
6222                                            bool HasCancel) {
6223   if (!CGF.HaveInsertPoint())
6224     return;
6225   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6226   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6227 }
6228 
6229 namespace {
6230 enum RTCancelKind {
6231   CancelNoreq = 0,
6232   CancelParallel = 1,
6233   CancelLoop = 2,
6234   CancelSections = 3,
6235   CancelTaskgroup = 4
6236 };
6237 } // anonymous namespace
6238 
getCancellationKind(OpenMPDirectiveKind CancelRegion)6239 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6240   RTCancelKind CancelKind = CancelNoreq;
6241   if (CancelRegion == OMPD_parallel)
6242     CancelKind = CancelParallel;
6243   else if (CancelRegion == OMPD_for)
6244     CancelKind = CancelLoop;
6245   else if (CancelRegion == OMPD_sections)
6246     CancelKind = CancelSections;
6247   else {
6248     assert(CancelRegion == OMPD_taskgroup);
6249     CancelKind = CancelTaskgroup;
6250   }
6251   return CancelKind;
6252 }
6253 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6254 void CGOpenMPRuntime::emitCancellationPointCall(
6255     CodeGenFunction &CGF, SourceLocation Loc,
6256     OpenMPDirectiveKind CancelRegion) {
6257   if (!CGF.HaveInsertPoint())
6258     return;
6259   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6260   // global_tid, kmp_int32 cncl_kind);
6261   if (auto *OMPRegionInfo =
6262           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6263     // For 'cancellation point taskgroup', the task region info may not have a
6264     // cancel. This may instead happen in another adjacent task.
6265     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6266       llvm::Value *Args[] = {
6267           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6268           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6269       // Ignore return result until untied tasks are supported.
6270       llvm::Value *Result = CGF.EmitRuntimeCall(
6271           OMPBuilder.getOrCreateRuntimeFunction(
6272               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6273           Args);
6274       // if (__kmpc_cancellationpoint()) {
6275       //   exit from construct;
6276       // }
6277       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6278       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6279       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6280       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6281       CGF.EmitBlock(ExitBB);
6282       // exit from construct;
6283       CodeGenFunction::JumpDest CancelDest =
6284           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6285       CGF.EmitBranchThroughCleanup(CancelDest);
6286       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6287     }
6288   }
6289 }
6290 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6291 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6292                                      const Expr *IfCond,
6293                                      OpenMPDirectiveKind CancelRegion) {
6294   if (!CGF.HaveInsertPoint())
6295     return;
6296   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6297   // kmp_int32 cncl_kind);
6298   auto &M = CGM.getModule();
6299   if (auto *OMPRegionInfo =
6300           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6301     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6302                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6303       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6304       llvm::Value *Args[] = {
6305           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6306           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6307       // Ignore return result until untied tasks are supported.
6308       llvm::Value *Result = CGF.EmitRuntimeCall(
6309           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6310       // if (__kmpc_cancel()) {
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       // exit from construct;
6319       CodeGenFunction::JumpDest CancelDest =
6320           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6321       CGF.EmitBranchThroughCleanup(CancelDest);
6322       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6323     };
6324     if (IfCond) {
6325       emitIfClause(CGF, IfCond, ThenGen,
6326                    [](CodeGenFunction &, PrePostActionTy &) {});
6327     } else {
6328       RegionCodeGenTy ThenRCG(ThenGen);
6329       ThenRCG(CGF);
6330     }
6331   }
6332 }
6333 
6334 namespace {
6335 /// Cleanup action for uses_allocators support.
6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6337   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6338 
6339 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6340   OMPUsesAllocatorsActionTy(
6341       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6342       : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6343   void Enter(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6348           CGF, AllocatorData.first, AllocatorData.second);
6349     }
6350   }
Exit(CodeGenFunction & CGF)6351   void Exit(CodeGenFunction &CGF) override {
6352     if (!CGF.HaveInsertPoint())
6353       return;
6354     for (const auto &AllocatorData : Allocators) {
6355       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6356                                                         AllocatorData.first);
6357     }
6358   }
6359 };
6360 } // namespace
6361 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6362 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6363     const OMPExecutableDirective &D, StringRef ParentName,
6364     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6365     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6366   assert(!ParentName.empty() && "Invalid target region parent name!");
6367   HasEmittedTargetRegion = true;
6368   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6369   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6370     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6371       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6372       if (!D.AllocatorTraits)
6373         continue;
6374       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6375     }
6376   }
6377   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6378   CodeGen.setAction(UsesAllocatorAction);
6379   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6380                                    IsOffloadEntry, CodeGen);
6381 }
6382 
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6384                                              const Expr *Allocator,
6385                                              const Expr *AllocatorTraits) {
6386   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6387   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6388   // Use default memspace handle.
6389   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6390   llvm::Value *NumTraits = llvm::ConstantInt::get(
6391       CGF.IntTy, cast<ConstantArrayType>(
6392                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6393                      ->getSize()
6394                      .getLimitedValue());
6395   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6396   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6397       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6398   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6399                                            AllocatorTraitsLVal.getBaseInfo(),
6400                                            AllocatorTraitsLVal.getTBAAInfo());
6401   llvm::Value *Traits =
6402       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6403 
6404   llvm::Value *AllocatorVal =
6405       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6406                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6407                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6408   // Store to allocator.
6409   CGF.EmitVarDecl(*cast<VarDecl>(
6410       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6411   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6412   AllocatorVal =
6413       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6414                                Allocator->getType(), Allocator->getExprLoc());
6415   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6416 }
6417 
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6419                                              const Expr *Allocator) {
6420   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6421   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6422   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6423   llvm::Value *AllocatorVal =
6424       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6425   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6426                                           CGF.getContext().VoidPtrTy,
6427                                           Allocator->getExprLoc());
6428   (void)CGF.EmitRuntimeCall(
6429       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6430                                             OMPRTL___kmpc_destroy_allocator),
6431       {ThreadId, AllocatorVal});
6432 }
6433 
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6435     const OMPExecutableDirective &D, StringRef ParentName,
6436     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6437     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6438   // Create a unique name for the entry function using the source location
6439   // information of the current target region. The name will be something like:
6440   //
6441   // __omp_offloading_DD_FFFF_PP_lBB
6442   //
6443   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6444   // mangled name of the function that encloses the target region and BB is the
6445   // line number of the target region.
6446 
6447   unsigned DeviceID;
6448   unsigned FileID;
6449   unsigned Line;
6450   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6451                            Line);
6452   SmallString<64> EntryFnName;
6453   {
6454     llvm::raw_svector_ostream OS(EntryFnName);
6455     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6456        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6457   }
6458 
6459   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6460 
6461   CodeGenFunction CGF(CGM, true);
6462   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6463   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6464 
6465   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6466 
6467   // If this target outline function is not an offload entry, we don't need to
6468   // register it.
6469   if (!IsOffloadEntry)
6470     return;
6471 
6472   // The target region ID is used by the runtime library to identify the current
6473   // target region, so it only has to be unique and not necessarily point to
6474   // anything. It could be the pointer to the outlined function that implements
6475   // the target region, but we aren't using that so that the compiler doesn't
6476   // need to keep that, and could therefore inline the host function if proven
6477   // worthwhile during optimization. In the other hand, if emitting code for the
6478   // device, the ID has to be the function address so that it can retrieved from
6479   // the offloading entry and launched by the runtime library. We also mark the
6480   // outlined function to have external linkage in case we are emitting code for
6481   // the device, because these functions will be entry points to the device.
6482 
6483   if (CGM.getLangOpts().OpenMPIsDevice) {
6484     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6485     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6486     OutlinedFn->setDSOLocal(false);
6487   } else {
6488     std::string Name = getName({EntryFnName, "region_id"});
6489     OutlinedFnID = new llvm::GlobalVariable(
6490         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6491         llvm::GlobalValue::WeakAnyLinkage,
6492         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6493   }
6494 
6495   // Register the information for the entry associated with this target region.
6496   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6497       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6498       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6499 }
6500 
6501 /// Checks if the expression is constant or does not have non-trivial function
6502 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6504   // We can skip constant expressions.
6505   // We can skip expressions with trivial calls or simple expressions.
6506   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6507           !E->hasNonTrivialCall(Ctx)) &&
6508          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6509 }
6510 
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6512                                                     const Stmt *Body) {
6513   const Stmt *Child = Body->IgnoreContainers();
6514   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6515     Child = nullptr;
6516     for (const Stmt *S : C->body()) {
6517       if (const auto *E = dyn_cast<Expr>(S)) {
6518         if (isTrivial(Ctx, E))
6519           continue;
6520       }
6521       // Some of the statements can be ignored.
6522       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6523           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6524         continue;
6525       // Analyze declarations.
6526       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6527         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6528               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6529                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6530                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6531                   isa<UsingDirectiveDecl>(D) ||
6532                   isa<OMPDeclareReductionDecl>(D) ||
6533                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6534                 return true;
6535               const auto *VD = dyn_cast<VarDecl>(D);
6536               if (!VD)
6537                 return false;
6538               return VD->isConstexpr() ||
6539                      ((VD->getType().isTrivialType(Ctx) ||
6540                        VD->getType()->isReferenceType()) &&
6541                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
6556 /// Emit the number of teams for a target directive.  Inspect the num_teams
6557 /// clause associated with a teams construct combined or closely nested
6558 /// with the target directive.
6559 ///
6560 /// Emit a team of size one for directives such as 'target parallel' that
6561 /// have no associated teams construct.
6562 ///
6563 /// Otherwise, return nullptr.
6564 static llvm::Value *
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6566                                const OMPExecutableDirective &D) {
6567   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6568          "Clauses associated with the teams directive expected to be emitted "
6569          "only for the host!");
6570   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6571   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6572          "Expected target-based executable directive.");
6573   CGBuilderTy &Bld = CGF.Builder;
6574   switch (DirectiveKind) {
6575   case OMPD_target: {
6576     const auto *CS = D.getInnermostCapturedStmt();
6577     const auto *Body =
6578         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6579     const Stmt *ChildStmt =
6580         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6581     if (const auto *NestedDir =
6582             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6583       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6584         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6585           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6586           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6587           const Expr *NumTeams =
6588               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6589           llvm::Value *NumTeamsVal =
6590               CGF.EmitScalarExpr(NumTeams,
6591                                  /*IgnoreResultAssign*/ true);
6592           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6593                                    /*isSigned=*/true);
6594         }
6595         return Bld.getInt32(0);
6596       }
6597       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6598           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6599         return Bld.getInt32(1);
6600       return Bld.getInt32(0);
6601     }
6602     return nullptr;
6603   }
6604   case OMPD_target_teams:
6605   case OMPD_target_teams_distribute:
6606   case OMPD_target_teams_distribute_simd:
6607   case OMPD_target_teams_distribute_parallel_for:
6608   case OMPD_target_teams_distribute_parallel_for_simd: {
6609     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6610       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611       const Expr *NumTeams =
6612           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6613       llvm::Value *NumTeamsVal =
6614           CGF.EmitScalarExpr(NumTeams,
6615                              /*IgnoreResultAssign*/ true);
6616       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6617                                /*isSigned=*/true);
6618     }
6619     return Bld.getInt32(0);
6620   }
6621   case OMPD_target_parallel:
6622   case OMPD_target_parallel_for:
6623   case OMPD_target_parallel_for_simd:
6624   case OMPD_target_simd:
6625     return Bld.getInt32(1);
6626   case OMPD_parallel:
6627   case OMPD_for:
6628   case OMPD_parallel_for:
6629   case OMPD_parallel_master:
6630   case OMPD_parallel_sections:
6631   case OMPD_for_simd:
6632   case OMPD_parallel_for_simd:
6633   case OMPD_cancel:
6634   case OMPD_cancellation_point:
6635   case OMPD_ordered:
6636   case OMPD_threadprivate:
6637   case OMPD_allocate:
6638   case OMPD_task:
6639   case OMPD_simd:
6640   case OMPD_sections:
6641   case OMPD_section:
6642   case OMPD_single:
6643   case OMPD_master:
6644   case OMPD_critical:
6645   case OMPD_taskyield:
6646   case OMPD_barrier:
6647   case OMPD_taskwait:
6648   case OMPD_taskgroup:
6649   case OMPD_atomic:
6650   case OMPD_flush:
6651   case OMPD_depobj:
6652   case OMPD_scan:
6653   case OMPD_teams:
6654   case OMPD_target_data:
6655   case OMPD_target_exit_data:
6656   case OMPD_target_enter_data:
6657   case OMPD_distribute:
6658   case OMPD_distribute_simd:
6659   case OMPD_distribute_parallel_for:
6660   case OMPD_distribute_parallel_for_simd:
6661   case OMPD_teams_distribute:
6662   case OMPD_teams_distribute_simd:
6663   case OMPD_teams_distribute_parallel_for:
6664   case OMPD_teams_distribute_parallel_for_simd:
6665   case OMPD_target_update:
6666   case OMPD_declare_simd:
6667   case OMPD_declare_variant:
6668   case OMPD_begin_declare_variant:
6669   case OMPD_end_declare_variant:
6670   case OMPD_declare_target:
6671   case OMPD_end_declare_target:
6672   case OMPD_declare_reduction:
6673   case OMPD_declare_mapper:
6674   case OMPD_taskloop:
6675   case OMPD_taskloop_simd:
6676   case OMPD_master_taskloop:
6677   case OMPD_master_taskloop_simd:
6678   case OMPD_parallel_master_taskloop:
6679   case OMPD_parallel_master_taskloop_simd:
6680   case OMPD_requires:
6681   case OMPD_unknown:
6682     break;
6683   default:
6684     break;
6685   }
6686   llvm_unreachable("Unexpected directive kind.");
6687 }
6688 
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6689 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6690                                   llvm::Value *DefaultThreadLimitVal) {
6691   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6692       CGF.getContext(), CS->getCapturedStmt());
6693   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6694     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6695       llvm::Value *NumThreads = nullptr;
6696       llvm::Value *CondVal = nullptr;
6697       // Handle if clause. If if clause present, the number of threads is
6698       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6699       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6700         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6701         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6702         const OMPIfClause *IfClause = nullptr;
6703         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6704           if (C->getNameModifier() == OMPD_unknown ||
6705               C->getNameModifier() == OMPD_parallel) {
6706             IfClause = C;
6707             break;
6708           }
6709         }
6710         if (IfClause) {
6711           const Expr *Cond = IfClause->getCondition();
6712           bool Result;
6713           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6714             if (!Result)
6715               return CGF.Builder.getInt32(1);
6716           } else {
6717             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6718             if (const auto *PreInit =
6719                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6720               for (const auto *I : PreInit->decls()) {
6721                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6722                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6723                 } else {
6724                   CodeGenFunction::AutoVarEmission Emission =
6725                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6726                   CGF.EmitAutoVarCleanups(Emission);
6727                 }
6728               }
6729             }
6730             CondVal = CGF.EvaluateExprAsBool(Cond);
6731           }
6732         }
6733       }
6734       // Check the value of num_threads clause iff if clause was not specified
6735       // or is not evaluated to false.
6736       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6737         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6738         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6739         const auto *NumThreadsClause =
6740             Dir->getSingleClause<OMPNumThreadsClause>();
6741         CodeGenFunction::LexicalScope Scope(
6742             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6743         if (const auto *PreInit =
6744                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6745           for (const auto *I : PreInit->decls()) {
6746             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6747               CGF.EmitVarDecl(cast<VarDecl>(*I));
6748             } else {
6749               CodeGenFunction::AutoVarEmission Emission =
6750                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6751               CGF.EmitAutoVarCleanups(Emission);
6752             }
6753           }
6754         }
6755         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6756         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6757                                                /*isSigned=*/false);
6758         if (DefaultThreadLimitVal)
6759           NumThreads = CGF.Builder.CreateSelect(
6760               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6761               DefaultThreadLimitVal, NumThreads);
6762       } else {
6763         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6764                                            : CGF.Builder.getInt32(0);
6765       }
6766       // Process condition of the if clause.
6767       if (CondVal) {
6768         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6769                                               CGF.Builder.getInt32(1));
6770       }
6771       return NumThreads;
6772     }
6773     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6774       return CGF.Builder.getInt32(1);
6775     return DefaultThreadLimitVal;
6776   }
6777   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6778                                : CGF.Builder.getInt32(0);
6779 }
6780 
6781 /// Emit the number of threads for a target directive.  Inspect the
6782 /// thread_limit clause associated with a teams construct combined or closely
6783 /// nested with the target directive.
6784 ///
6785 /// Emit the num_threads clause for directives such as 'target parallel' that
6786 /// have no associated teams construct.
6787 ///
6788 /// Otherwise, return nullptr.
6789 static llvm::Value *
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6790 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6791                                  const OMPExecutableDirective &D) {
6792   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6793          "Clauses associated with the teams directive expected to be emitted "
6794          "only for the host!");
6795   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6796   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6797          "Expected target-based executable directive.");
6798   CGBuilderTy &Bld = CGF.Builder;
6799   llvm::Value *ThreadLimitVal = nullptr;
6800   llvm::Value *NumThreadsVal = nullptr;
6801   switch (DirectiveKind) {
6802   case OMPD_target: {
6803     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6804     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6805       return NumThreads;
6806     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6807         CGF.getContext(), CS->getCapturedStmt());
6808     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6809       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6810         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6811         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6812         const auto *ThreadLimitClause =
6813             Dir->getSingleClause<OMPThreadLimitClause>();
6814         CodeGenFunction::LexicalScope Scope(
6815             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6816         if (const auto *PreInit =
6817                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6818           for (const auto *I : PreInit->decls()) {
6819             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6820               CGF.EmitVarDecl(cast<VarDecl>(*I));
6821             } else {
6822               CodeGenFunction::AutoVarEmission Emission =
6823                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6824               CGF.EmitAutoVarCleanups(Emission);
6825             }
6826           }
6827         }
6828         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6829             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6830         ThreadLimitVal =
6831             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6832       }
6833       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6834           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6835         CS = Dir->getInnermostCapturedStmt();
6836         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6837             CGF.getContext(), CS->getCapturedStmt());
6838         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6839       }
6840       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6841           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6842         CS = Dir->getInnermostCapturedStmt();
6843         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6844           return NumThreads;
6845       }
6846       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6847         return Bld.getInt32(1);
6848     }
6849     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6850   }
6851   case OMPD_target_teams: {
6852     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6853       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6854       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6855       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6856           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6857       ThreadLimitVal =
6858           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6859     }
6860     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6861     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6862       return NumThreads;
6863     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6864         CGF.getContext(), CS->getCapturedStmt());
6865     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6866       if (Dir->getDirectiveKind() == OMPD_distribute) {
6867         CS = Dir->getInnermostCapturedStmt();
6868         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6869           return NumThreads;
6870       }
6871     }
6872     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6873   }
6874   case OMPD_target_teams_distribute:
6875     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6876       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6877       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6878       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6879           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6880       ThreadLimitVal =
6881           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6882     }
6883     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6884   case OMPD_target_parallel:
6885   case OMPD_target_parallel_for:
6886   case OMPD_target_parallel_for_simd:
6887   case OMPD_target_teams_distribute_parallel_for:
6888   case OMPD_target_teams_distribute_parallel_for_simd: {
6889     llvm::Value *CondVal = nullptr;
6890     // Handle if clause. If if clause present, the number of threads is
6891     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6892     if (D.hasClausesOfKind<OMPIfClause>()) {
6893       const OMPIfClause *IfClause = nullptr;
6894       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6895         if (C->getNameModifier() == OMPD_unknown ||
6896             C->getNameModifier() == OMPD_parallel) {
6897           IfClause = C;
6898           break;
6899         }
6900       }
6901       if (IfClause) {
6902         const Expr *Cond = IfClause->getCondition();
6903         bool Result;
6904         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6905           if (!Result)
6906             return Bld.getInt32(1);
6907         } else {
6908           CodeGenFunction::RunCleanupsScope Scope(CGF);
6909           CondVal = CGF.EvaluateExprAsBool(Cond);
6910         }
6911       }
6912     }
6913     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6914       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6915       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6916       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6917           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6918       ThreadLimitVal =
6919           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6920     }
6921     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6922       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6923       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6924       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6925           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6926       NumThreadsVal =
6927           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6928       ThreadLimitVal = ThreadLimitVal
6929                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6930                                                                 ThreadLimitVal),
6931                                               NumThreadsVal, ThreadLimitVal)
6932                            : NumThreadsVal;
6933     }
6934     if (!ThreadLimitVal)
6935       ThreadLimitVal = Bld.getInt32(0);
6936     if (CondVal)
6937       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6938     return ThreadLimitVal;
6939   }
6940   case OMPD_target_teams_distribute_simd:
6941   case OMPD_target_simd:
6942     return Bld.getInt32(1);
6943   case OMPD_parallel:
6944   case OMPD_for:
6945   case OMPD_parallel_for:
6946   case OMPD_parallel_master:
6947   case OMPD_parallel_sections:
6948   case OMPD_for_simd:
6949   case OMPD_parallel_for_simd:
6950   case OMPD_cancel:
6951   case OMPD_cancellation_point:
6952   case OMPD_ordered:
6953   case OMPD_threadprivate:
6954   case OMPD_allocate:
6955   case OMPD_task:
6956   case OMPD_simd:
6957   case OMPD_sections:
6958   case OMPD_section:
6959   case OMPD_single:
6960   case OMPD_master:
6961   case OMPD_critical:
6962   case OMPD_taskyield:
6963   case OMPD_barrier:
6964   case OMPD_taskwait:
6965   case OMPD_taskgroup:
6966   case OMPD_atomic:
6967   case OMPD_flush:
6968   case OMPD_depobj:
6969   case OMPD_scan:
6970   case OMPD_teams:
6971   case OMPD_target_data:
6972   case OMPD_target_exit_data:
6973   case OMPD_target_enter_data:
6974   case OMPD_distribute:
6975   case OMPD_distribute_simd:
6976   case OMPD_distribute_parallel_for:
6977   case OMPD_distribute_parallel_for_simd:
6978   case OMPD_teams_distribute:
6979   case OMPD_teams_distribute_simd:
6980   case OMPD_teams_distribute_parallel_for:
6981   case OMPD_teams_distribute_parallel_for_simd:
6982   case OMPD_target_update:
6983   case OMPD_declare_simd:
6984   case OMPD_declare_variant:
6985   case OMPD_begin_declare_variant:
6986   case OMPD_end_declare_variant:
6987   case OMPD_declare_target:
6988   case OMPD_end_declare_target:
6989   case OMPD_declare_reduction:
6990   case OMPD_declare_mapper:
6991   case OMPD_taskloop:
6992   case OMPD_taskloop_simd:
6993   case OMPD_master_taskloop:
6994   case OMPD_master_taskloop_simd:
6995   case OMPD_parallel_master_taskloop:
6996   case OMPD_parallel_master_taskloop_simd:
6997   case OMPD_requires:
6998   case OMPD_unknown:
6999     break;
7000   default:
7001     break;
7002   }
7003   llvm_unreachable("Unsupported directive kind.");
7004 }
7005 
7006 namespace {
7007 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7008 
7009 // Utility to handle information from clauses associated with a given
7010 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7011 // It provides a convenient interface to obtain the information and generate
7012 // code for that information.
7013 class MappableExprsHandler {
7014 public:
7015   /// Values for bit flags used to specify the mapping type for
7016   /// offloading.
7017   enum OpenMPOffloadMappingFlags : uint64_t {
7018     /// No flags
7019     OMP_MAP_NONE = 0x0,
7020     /// Allocate memory on the device and move data from host to device.
7021     OMP_MAP_TO = 0x01,
7022     /// Allocate memory on the device and move data from device to host.
7023     OMP_MAP_FROM = 0x02,
7024     /// Always perform the requested mapping action on the element, even
7025     /// if it was already mapped before.
7026     OMP_MAP_ALWAYS = 0x04,
7027     /// Delete the element from the device environment, ignoring the
7028     /// current reference count associated with the element.
7029     OMP_MAP_DELETE = 0x08,
7030     /// The element being mapped is a pointer-pointee pair; both the
7031     /// pointer and the pointee should be mapped.
7032     OMP_MAP_PTR_AND_OBJ = 0x10,
7033     /// This flags signals that the base address of an entry should be
7034     /// passed to the target kernel as an argument.
7035     OMP_MAP_TARGET_PARAM = 0x20,
7036     /// Signal that the runtime library has to return the device pointer
7037     /// in the current position for the data being mapped. Used when we have the
7038     /// use_device_ptr or use_device_addr clause.
7039     OMP_MAP_RETURN_PARAM = 0x40,
7040     /// This flag signals that the reference being passed is a pointer to
7041     /// private data.
7042     OMP_MAP_PRIVATE = 0x80,
7043     /// Pass the element to the device by value.
7044     OMP_MAP_LITERAL = 0x100,
7045     /// Implicit map
7046     OMP_MAP_IMPLICIT = 0x200,
7047     /// Close is a hint to the runtime to allocate memory close to
7048     /// the target device.
7049     OMP_MAP_CLOSE = 0x400,
7050     /// The 16 MSBs of the flags indicate whether the entry is member of some
7051     /// struct/class.
7052     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7053     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7054   };
7055 
7056   /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()7057   static unsigned getFlagMemberOffset() {
7058     unsigned Offset = 0;
7059     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7060          Remain = Remain >> 1)
7061       Offset++;
7062     return Offset;
7063   }
7064 
7065   /// Class that associates information with a base pointer to be passed to the
7066   /// runtime library.
7067   class BasePointerInfo {
7068     /// The base pointer.
7069     llvm::Value *Ptr = nullptr;
7070     /// The base declaration that refers to this device pointer, or null if
7071     /// there is none.
7072     const ValueDecl *DevPtrDecl = nullptr;
7073 
7074   public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)7075     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7076         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const7077     llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const7078     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)7079     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7080   };
7081 
7082   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7083   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7084   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7085 
7086   /// Map between a struct and the its lowest & highest elements which have been
7087   /// mapped.
7088   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7089   ///                    HE(FieldIndex, Pointer)}
7090   struct StructRangeInfoTy {
7091     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7092         0, Address::invalid()};
7093     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7094         0, Address::invalid()};
7095     Address Base = Address::invalid();
7096   };
7097 
7098 private:
7099   /// Kind that defines how a device pointer has to be returned.
7100   struct MapInfo {
7101     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7102     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7103     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7104     bool ReturnDevicePointer = false;
7105     bool IsImplicit = false;
7106     bool ForDeviceAddr = false;
7107 
7108     MapInfo() = default;
MapInfo__anone0633a093d11::MappableExprsHandler::MapInfo7109     MapInfo(
7110         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7111         OpenMPMapClauseKind MapType,
7112         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7113         bool IsImplicit, bool ForDeviceAddr = false)
7114         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7115           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7116           ForDeviceAddr(ForDeviceAddr) {}
7117   };
7118 
7119   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7120   /// member and there is no map information about it, then emission of that
7121   /// entry is deferred until the whole struct has been processed.
7122   struct DeferredDevicePtrEntryTy {
7123     const Expr *IE = nullptr;
7124     const ValueDecl *VD = nullptr;
7125     bool ForDeviceAddr = false;
7126 
DeferredDevicePtrEntryTy__anone0633a093d11::MappableExprsHandler::DeferredDevicePtrEntryTy7127     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7128                              bool ForDeviceAddr)
7129         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7130   };
7131 
7132   /// The target directive from where the mappable clauses were extracted. It
7133   /// is either a executable directive or a user-defined mapper directive.
7134   llvm::PointerUnion<const OMPExecutableDirective *,
7135                      const OMPDeclareMapperDecl *>
7136       CurDir;
7137 
7138   /// Function the directive is being generated for.
7139   CodeGenFunction &CGF;
7140 
7141   /// Set of all first private variables in the current directive.
7142   /// bool data is set to true if the variable is implicitly marked as
7143   /// firstprivate, false otherwise.
7144   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7145 
7146   /// Map between device pointer declarations and their expression components.
7147   /// The key value for declarations in 'this' is null.
7148   llvm::DenseMap<
7149       const ValueDecl *,
7150       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7151       DevPointersMap;
7152 
getExprTypeSize(const Expr * E) const7153   llvm::Value *getExprTypeSize(const Expr *E) const {
7154     QualType ExprTy = E->getType().getCanonicalType();
7155 
7156     // Calculate the size for array shaping expression.
7157     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7158       llvm::Value *Size =
7159           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7160       for (const Expr *SE : OAE->getDimensions()) {
7161         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7162         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7163                                       CGF.getContext().getSizeType(),
7164                                       SE->getExprLoc());
7165         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7166       }
7167       return Size;
7168     }
7169 
7170     // Reference types are ignored for mapping purposes.
7171     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7172       ExprTy = RefTy->getPointeeType().getCanonicalType();
7173 
7174     // Given that an array section is considered a built-in type, we need to
7175     // do the calculation based on the length of the section instead of relying
7176     // on CGF.getTypeSize(E->getType()).
7177     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7178       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7179                             OAE->getBase()->IgnoreParenImpCasts())
7180                             .getCanonicalType();
7181 
7182       // If there is no length associated with the expression and lower bound is
7183       // not specified too, that means we are using the whole length of the
7184       // base.
7185       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7186           !OAE->getLowerBound())
7187         return CGF.getTypeSize(BaseTy);
7188 
7189       llvm::Value *ElemSize;
7190       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192       } else {
7193         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194         assert(ATy && "Expecting array type if not a pointer type.");
7195         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196       }
7197 
7198       // If we don't have a length at this point, that is because we have an
7199       // array section with a single element.
7200       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7201         return ElemSize;
7202 
7203       if (const Expr *LenExpr = OAE->getLength()) {
7204         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7205         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7206                                              CGF.getContext().getSizeType(),
7207                                              LenExpr->getExprLoc());
7208         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7209       }
7210       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7211              OAE->getLowerBound() && "expected array_section[lb:].");
7212       // Size = sizetype - lb * elemtype;
7213       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7214       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7215       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7216                                        CGF.getContext().getSizeType(),
7217                                        OAE->getLowerBound()->getExprLoc());
7218       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7219       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7220       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7221       LengthVal = CGF.Builder.CreateSelect(
7222           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7223       return LengthVal;
7224     }
7225     return CGF.getTypeSize(ExprTy);
7226   }
7227 
7228   /// Return the corresponding bits for a given map clause modifier. Add
7229   /// a flag marking the map as a pointer if requested. Add a flag marking the
7230   /// map as the first one of a series of maps that relate to the same map
7231   /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag) const7232   OpenMPOffloadMappingFlags getMapTypeBits(
7233       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7234       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7235     OpenMPOffloadMappingFlags Bits =
7236         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7237     switch (MapType) {
7238     case OMPC_MAP_alloc:
7239     case OMPC_MAP_release:
7240       // alloc and release is the default behavior in the runtime library,  i.e.
7241       // if we don't pass any bits alloc/release that is what the runtime is
7242       // going to do. Therefore, we don't need to signal anything for these two
7243       // type modifiers.
7244       break;
7245     case OMPC_MAP_to:
7246       Bits |= OMP_MAP_TO;
7247       break;
7248     case OMPC_MAP_from:
7249       Bits |= OMP_MAP_FROM;
7250       break;
7251     case OMPC_MAP_tofrom:
7252       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7253       break;
7254     case OMPC_MAP_delete:
7255       Bits |= OMP_MAP_DELETE;
7256       break;
7257     case OMPC_MAP_unknown:
7258       llvm_unreachable("Unexpected map type!");
7259     }
7260     if (AddPtrFlag)
7261       Bits |= OMP_MAP_PTR_AND_OBJ;
7262     if (AddIsTargetParamFlag)
7263       Bits |= OMP_MAP_TARGET_PARAM;
7264     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7265         != MapModifiers.end())
7266       Bits |= OMP_MAP_ALWAYS;
7267     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7268         != MapModifiers.end())
7269       Bits |= OMP_MAP_CLOSE;
7270     return Bits;
7271   }
7272 
7273   /// Return true if the provided expression is a final array section. A
7274   /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7275   bool isFinalArraySectionExpression(const Expr *E) const {
7276     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7277 
7278     // It is not an array section and therefore not a unity-size one.
7279     if (!OASE)
7280       return false;
7281 
7282     // An array section with no colon always refer to a single element.
7283     if (OASE->getColonLocFirst().isInvalid())
7284       return false;
7285 
7286     const Expr *Length = OASE->getLength();
7287 
7288     // If we don't have a length we have to check if the array has size 1
7289     // for this dimension. Also, we should always expect a length if the
7290     // base type is pointer.
7291     if (!Length) {
7292       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7293                              OASE->getBase()->IgnoreParenImpCasts())
7294                              .getCanonicalType();
7295       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7296         return ATy->getSize().getSExtValue() != 1;
7297       // If we don't have a constant dimension length, we have to consider
7298       // the current section as having any size, so it is not necessarily
7299       // unitary. If it happen to be unity size, that's user fault.
7300       return true;
7301     }
7302 
7303     // Check if the length evaluates to 1.
7304     Expr::EvalResult Result;
7305     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7306       return true; // Can have more that size 1.
7307 
7308     llvm::APSInt ConstLength = Result.Val.getInt();
7309     return ConstLength.getSExtValue() != 1;
7310   }
7311 
7312   /// Generate the base pointers, section pointers, sizes and map type
7313   /// bits for the provided map type, map modifier, and expression components.
7314   /// \a IsFirstComponent should be set to true if the provided set of
7315   /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool ForDeviceAddr=false,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7316   void generateInfoForComponentList(
7317       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7318       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7319       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7320       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7321       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7322       bool IsImplicit, bool ForDeviceAddr = false,
7323       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7324           OverlappedElements = llvm::None) const {
7325     // The following summarizes what has to be generated for each map and the
7326     // types below. The generated information is expressed in this order:
7327     // base pointer, section pointer, size, flags
7328     // (to add to the ones that come from the map type and modifier).
7329     //
7330     // double d;
7331     // int i[100];
7332     // float *p;
7333     //
7334     // struct S1 {
7335     //   int i;
7336     //   float f[50];
7337     // }
7338     // struct S2 {
7339     //   int i;
7340     //   float f[50];
7341     //   S1 s;
7342     //   double *p;
7343     //   struct S2 *ps;
7344     // }
7345     // S2 s;
7346     // S2 *ps;
7347     //
7348     // map(d)
7349     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7350     //
7351     // map(i)
7352     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7353     //
7354     // map(i[1:23])
7355     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7356     //
7357     // map(p)
7358     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7359     //
7360     // map(p[1:24])
7361     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7362     //
7363     // map(s)
7364     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7365     //
7366     // map(s.i)
7367     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7368     //
7369     // map(s.s.f)
7370     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7371     //
7372     // map(s.p)
7373     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7374     //
7375     // map(to: s.p[:22])
7376     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7377     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7378     // &(s.p), &(s.p[0]), 22*sizeof(double),
7379     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7380     // (*) alloc space for struct members, only this is a target parameter
7381     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7382     //      optimizes this entry out, same in the examples below)
7383     // (***) map the pointee (map: to)
7384     //
7385     // map(s.ps)
7386     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7387     //
7388     // map(from: s.ps->s.i)
7389     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7390     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7391     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7392     //
7393     // map(to: s.ps->ps)
7394     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7395     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7396     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7397     //
7398     // map(s.ps->ps->ps)
7399     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7400     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7401     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7402     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7403     //
7404     // map(to: s.ps->ps->s.f[:22])
7405     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7406     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7407     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7408     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7409     //
7410     // map(ps)
7411     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7412     //
7413     // map(ps->i)
7414     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7415     //
7416     // map(ps->s.f)
7417     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7418     //
7419     // map(from: ps->p)
7420     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7421     //
7422     // map(to: ps->p[:22])
7423     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7424     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7425     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7426     //
7427     // map(ps->ps)
7428     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7429     //
7430     // map(from: ps->ps->s.i)
7431     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7432     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7433     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7434     //
7435     // map(from: ps->ps->ps)
7436     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7437     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7438     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439     //
7440     // map(ps->ps->ps->ps)
7441     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7442     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7443     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7444     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7445     //
7446     // map(to: ps->ps->ps->s.f[:22])
7447     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7448     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7449     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7450     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7451     //
7452     // map(to: s.f[:22]) map(from: s.p[:33])
7453     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7454     //     sizeof(double*) (**), TARGET_PARAM
7455     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7456     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7457     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7458     // (*) allocate contiguous space needed to fit all mapped members even if
7459     //     we allocate space for members not mapped (in this example,
7460     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7461     //     them as well because they fall between &s.f[0] and &s.p)
7462     //
7463     // map(from: s.f[:22]) map(to: ps->p[:33])
7464     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7465     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7466     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7467     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7468     // (*) the struct this entry pertains to is the 2nd element in the list of
7469     //     arguments, hence MEMBER_OF(2)
7470     //
7471     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7472     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7473     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7474     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7475     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7476     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7477     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7478     // (*) the struct this entry pertains to is the 4th element in the list
7479     //     of arguments, hence MEMBER_OF(4)
7480 
7481     // Track if the map information being generated is the first for a capture.
7482     bool IsCaptureFirstInfo = IsFirstComponentList;
7483     // When the variable is on a declare target link or in a to clause with
7484     // unified memory, a reference is needed to hold the host/device address
7485     // of the variable.
7486     bool RequiresReference = false;
7487 
7488     // Scan the components from the base to the complete expression.
7489     auto CI = Components.rbegin();
7490     auto CE = Components.rend();
7491     auto I = CI;
7492 
7493     // Track if the map information being generated is the first for a list of
7494     // components.
7495     bool IsExpressionFirstInfo = true;
7496     Address BP = Address::invalid();
7497     const Expr *AssocExpr = I->getAssociatedExpression();
7498     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7499     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7500     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7501 
7502     if (isa<MemberExpr>(AssocExpr)) {
7503       // The base is the 'this' pointer. The content of the pointer is going
7504       // to be the base of the field being mapped.
7505       BP = CGF.LoadCXXThisAddress();
7506     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7507                (OASE &&
7508                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7509       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7510     } else if (OAShE &&
7511                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7512       BP = Address(
7513           CGF.EmitScalarExpr(OAShE->getBase()),
7514           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7515     } else {
7516       // The base is the reference to the variable.
7517       // BP = &Var.
7518       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7519       if (const auto *VD =
7520               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7521         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7522                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7523           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7524               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7525                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7526             RequiresReference = true;
7527             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7528           }
7529         }
7530       }
7531 
7532       // If the variable is a pointer and is being dereferenced (i.e. is not
7533       // the last component), the base has to be the pointer itself, not its
7534       // reference. References are ignored for mapping purposes.
7535       QualType Ty =
7536           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7537       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7538         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7539 
7540         // We do not need to generate individual map information for the
7541         // pointer, it can be associated with the combined storage.
7542         ++I;
7543       }
7544     }
7545 
7546     // Track whether a component of the list should be marked as MEMBER_OF some
7547     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7548     // in a component list should be marked as MEMBER_OF, all subsequent entries
7549     // do not belong to the base struct. E.g.
7550     // struct S2 s;
7551     // s.ps->ps->ps->f[:]
7552     //   (1) (2) (3) (4)
7553     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7554     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7555     // is the pointee of ps(2) which is not member of struct s, so it should not
7556     // be marked as such (it is still PTR_AND_OBJ).
7557     // The variable is initialized to false so that PTR_AND_OBJ entries which
7558     // are not struct members are not considered (e.g. array of pointers to
7559     // data).
7560     bool ShouldBeMemberOf = false;
7561 
7562     // Variable keeping track of whether or not we have encountered a component
7563     // in the component list which is a member expression. Useful when we have a
7564     // pointer or a final array section, in which case it is the previous
7565     // component in the list which tells us whether we have a member expression.
7566     // E.g. X.f[:]
7567     // While processing the final array section "[:]" it is "f" which tells us
7568     // whether we are dealing with a member of a declared struct.
7569     const MemberExpr *EncounteredME = nullptr;
7570 
7571     for (; I != CE; ++I) {
7572       // If the current component is member of a struct (parent struct) mark it.
7573       if (!EncounteredME) {
7574         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7575         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7576         // as MEMBER_OF the parent struct.
7577         if (EncounteredME)
7578           ShouldBeMemberOf = true;
7579       }
7580 
7581       auto Next = std::next(I);
7582 
7583       // We need to generate the addresses and sizes if this is the last
7584       // component, if the component is a pointer or if it is an array section
7585       // whose length can't be proved to be one. If this is a pointer, it
7586       // becomes the base address for the following components.
7587 
7588       // A final array section, is one whose length can't be proved to be one.
7589       bool IsFinalArraySection =
7590           isFinalArraySectionExpression(I->getAssociatedExpression());
7591 
7592       // Get information on whether the element is a pointer. Have to do a
7593       // special treatment for array sections given that they are built-in
7594       // types.
7595       const auto *OASE =
7596           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7597       const auto *OAShE =
7598           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7599       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7600       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7601       bool IsPointer =
7602           OAShE ||
7603           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7604                        .getCanonicalType()
7605                        ->isAnyPointerType()) ||
7606           I->getAssociatedExpression()->getType()->isAnyPointerType();
7607       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7608 
7609       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7610         // If this is not the last component, we expect the pointer to be
7611         // associated with an array expression or member expression.
7612         assert((Next == CE ||
7613                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7614                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7615                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7616                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7617                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7618                "Unexpected expression");
7619 
7620         Address LB = Address::invalid();
7621         if (OAShE) {
7622           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7623                        CGF.getContext().getTypeAlignInChars(
7624                            OAShE->getBase()->getType()));
7625         } else {
7626           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7627                    .getAddress(CGF);
7628         }
7629 
7630         // If this component is a pointer inside the base struct then we don't
7631         // need to create any entry for it - it will be combined with the object
7632         // it is pointing to into a single PTR_AND_OBJ entry.
7633         bool IsMemberPointerOrAddr =
7634             (IsPointer || ForDeviceAddr) && EncounteredME &&
7635             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7636              EncounteredME);
7637         if (!OverlappedElements.empty()) {
7638           // Handle base element with the info for overlapped elements.
7639           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7640           assert(Next == CE &&
7641                  "Expected last element for the overlapped elements.");
7642           assert(!IsPointer &&
7643                  "Unexpected base element with the pointer type.");
7644           // Mark the whole struct as the struct that requires allocation on the
7645           // device.
7646           PartialStruct.LowestElem = {0, LB};
7647           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7648               I->getAssociatedExpression()->getType());
7649           Address HB = CGF.Builder.CreateConstGEP(
7650               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7651                                                               CGF.VoidPtrTy),
7652               TypeSize.getQuantity() - 1);
7653           PartialStruct.HighestElem = {
7654               std::numeric_limits<decltype(
7655                   PartialStruct.HighestElem.first)>::max(),
7656               HB};
7657           PartialStruct.Base = BP;
7658           // Emit data for non-overlapped data.
7659           OpenMPOffloadMappingFlags Flags =
7660               OMP_MAP_MEMBER_OF |
7661               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7662                              /*AddPtrFlag=*/false,
7663                              /*AddIsTargetParamFlag=*/false);
7664           LB = BP;
7665           llvm::Value *Size = nullptr;
7666           // Do bitcopy of all non-overlapped structure elements.
7667           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7668                    Component : OverlappedElements) {
7669             Address ComponentLB = Address::invalid();
7670             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7671                  Component) {
7672               if (MC.getAssociatedDeclaration()) {
7673                 ComponentLB =
7674                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7675                         .getAddress(CGF);
7676                 Size = CGF.Builder.CreatePtrDiff(
7677                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7678                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7679                 break;
7680               }
7681             }
7682             BasePointers.push_back(BP.getPointer());
7683             Pointers.push_back(LB.getPointer());
7684             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7685                                                       /*isSigned=*/true));
7686             Types.push_back(Flags);
7687             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7688           }
7689           BasePointers.push_back(BP.getPointer());
7690           Pointers.push_back(LB.getPointer());
7691           Size = CGF.Builder.CreatePtrDiff(
7692               CGF.EmitCastToVoidPtr(
7693                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7694               CGF.EmitCastToVoidPtr(LB.getPointer()));
7695           Sizes.push_back(
7696               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7697           Types.push_back(Flags);
7698           break;
7699         }
7700         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7701         if (!IsMemberPointerOrAddr) {
7702           BasePointers.push_back(BP.getPointer());
7703           Pointers.push_back(LB.getPointer());
7704           Sizes.push_back(
7705               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7706 
7707           // We need to add a pointer flag for each map that comes from the
7708           // same expression except for the first one. We also need to signal
7709           // this map is the first one that relates with the current capture
7710           // (there is a set of entries for each capture).
7711           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7712               MapType, MapModifiers, IsImplicit,
7713               !IsExpressionFirstInfo || RequiresReference,
7714               IsCaptureFirstInfo && !RequiresReference);
7715 
7716           if (!IsExpressionFirstInfo) {
7717             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7718             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7719             if (IsPointer)
7720               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7721                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7722 
7723             if (ShouldBeMemberOf) {
7724               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7725               // should be later updated with the correct value of MEMBER_OF.
7726               Flags |= OMP_MAP_MEMBER_OF;
7727               // From now on, all subsequent PTR_AND_OBJ entries should not be
7728               // marked as MEMBER_OF.
7729               ShouldBeMemberOf = false;
7730             }
7731           }
7732 
7733           Types.push_back(Flags);
7734         }
7735 
7736         // If we have encountered a member expression so far, keep track of the
7737         // mapped member. If the parent is "*this", then the value declaration
7738         // is nullptr.
7739         if (EncounteredME) {
7740           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7741           unsigned FieldIndex = FD->getFieldIndex();
7742 
7743           // Update info about the lowest and highest elements for this struct
7744           if (!PartialStruct.Base.isValid()) {
7745             PartialStruct.LowestElem = {FieldIndex, LB};
7746             if (IsFinalArraySection) {
7747               Address HB =
7748                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7749                       .getAddress(CGF);
7750               PartialStruct.HighestElem = {FieldIndex, HB};
7751             } else {
7752               PartialStruct.HighestElem = {FieldIndex, LB};
7753             }
7754             PartialStruct.Base = BP;
7755           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7756             PartialStruct.LowestElem = {FieldIndex, LB};
7757           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7758             PartialStruct.HighestElem = {FieldIndex, LB};
7759           }
7760         }
7761 
7762         // If we have a final array section, we are done with this expression.
7763         if (IsFinalArraySection)
7764           break;
7765 
7766         // The pointer becomes the base for the next element.
7767         if (Next != CE)
7768           BP = LB;
7769 
7770         IsExpressionFirstInfo = false;
7771         IsCaptureFirstInfo = false;
7772       }
7773     }
7774   }
7775 
7776   /// Return the adjusted map modifiers if the declaration a capture refers to
7777   /// appears in a first-private clause. This is expected to be used only with
7778   /// directives that start with 'target'.
7779   MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7780   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7781     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7782 
7783     // A first private variable captured by reference will use only the
7784     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7785     // declaration is known as first-private in this handler.
7786     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7787       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7788           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7789         return MappableExprsHandler::OMP_MAP_ALWAYS |
7790                MappableExprsHandler::OMP_MAP_TO;
7791       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7792         return MappableExprsHandler::OMP_MAP_TO |
7793                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7794       return MappableExprsHandler::OMP_MAP_PRIVATE |
7795              MappableExprsHandler::OMP_MAP_TO;
7796     }
7797     return MappableExprsHandler::OMP_MAP_TO |
7798            MappableExprsHandler::OMP_MAP_FROM;
7799   }
7800 
getMemberOfFlag(unsigned Position)7801   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7802     // Rotate by getFlagMemberOffset() bits.
7803     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7804                                                   << getFlagMemberOffset());
7805   }
7806 
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)7807   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7808                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7809     // If the entry is PTR_AND_OBJ but has not been marked with the special
7810     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7811     // marked as MEMBER_OF.
7812     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7813         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7814       return;
7815 
7816     // Reset the placeholder value to prepare the flag for the assignment of the
7817     // proper MEMBER_OF value.
7818     Flags &= ~OMP_MAP_MEMBER_OF;
7819     Flags |= MemberOfFlag;
7820   }
7821 
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7822   void getPlainLayout(const CXXRecordDecl *RD,
7823                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7824                       bool AsBase) const {
7825     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7826 
7827     llvm::StructType *St =
7828         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7829 
7830     unsigned NumElements = St->getNumElements();
7831     llvm::SmallVector<
7832         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7833         RecordLayout(NumElements);
7834 
7835     // Fill bases.
7836     for (const auto &I : RD->bases()) {
7837       if (I.isVirtual())
7838         continue;
7839       const auto *Base = I.getType()->getAsCXXRecordDecl();
7840       // Ignore empty bases.
7841       if (Base->isEmpty() || CGF.getContext()
7842                                  .getASTRecordLayout(Base)
7843                                  .getNonVirtualSize()
7844                                  .isZero())
7845         continue;
7846 
7847       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7848       RecordLayout[FieldIndex] = Base;
7849     }
7850     // Fill in virtual bases.
7851     for (const auto &I : RD->vbases()) {
7852       const auto *Base = I.getType()->getAsCXXRecordDecl();
7853       // Ignore empty bases.
7854       if (Base->isEmpty())
7855         continue;
7856       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7857       if (RecordLayout[FieldIndex])
7858         continue;
7859       RecordLayout[FieldIndex] = Base;
7860     }
7861     // Fill in all the fields.
7862     assert(!RD->isUnion() && "Unexpected union.");
7863     for (const auto *Field : RD->fields()) {
7864       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7865       // will fill in later.)
7866       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7867         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7868         RecordLayout[FieldIndex] = Field;
7869       }
7870     }
7871     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7872              &Data : RecordLayout) {
7873       if (Data.isNull())
7874         continue;
7875       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7876         getPlainLayout(Base, Layout, /*AsBase=*/true);
7877       else
7878         Layout.push_back(Data.get<const FieldDecl *>());
7879     }
7880   }
7881 
7882 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)7883   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7884       : CurDir(&Dir), CGF(CGF) {
7885     // Extract firstprivate clause information.
7886     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7887       for (const auto *D : C->varlists())
7888         FirstPrivateDecls.try_emplace(
7889             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7890     // Extract implicit firstprivates from uses_allocators clauses.
7891     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7892       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7893         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7894         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7895           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7896                                         /*Implicit=*/true);
7897         else if (const auto *VD = dyn_cast<VarDecl>(
7898                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7899                          ->getDecl()))
7900           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7901       }
7902     }
7903     // Extract device pointer clause information.
7904     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7905       for (auto L : C->component_lists())
7906         DevPointersMap[L.first].push_back(L.second);
7907   }
7908 
7909   /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)7910   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7911       : CurDir(&Dir), CGF(CGF) {}
7912 
7913   /// Generate code for the combined entry if we have a partially mapped struct
7914   /// and take care of the mapping flags of the arguments corresponding to
7915   /// individual struct members.
emitCombinedEntry(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct) const7916   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7917                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7918                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7919                          const StructRangeInfoTy &PartialStruct) const {
7920     // Base is the base of the struct
7921     BasePointers.push_back(PartialStruct.Base.getPointer());
7922     // Pointer is the address of the lowest element
7923     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7924     Pointers.push_back(LB);
7925     // Size is (addr of {highest+1} element) - (addr of lowest element)
7926     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7927     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7928     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7929     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7930     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7931     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7932                                                   /*isSigned=*/false);
7933     Sizes.push_back(Size);
7934     // Map type is always TARGET_PARAM
7935     Types.push_back(OMP_MAP_TARGET_PARAM);
7936     // Remove TARGET_PARAM flag from the first element
7937     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7938 
7939     // All other current entries will be MEMBER_OF the combined entry
7940     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7941     // 0xFFFF in the MEMBER_OF field).
7942     OpenMPOffloadMappingFlags MemberOfFlag =
7943         getMemberOfFlag(BasePointers.size() - 1);
7944     for (auto &M : CurTypes)
7945       setCorrectMemberOfFlag(M, MemberOfFlag);
7946   }
7947 
7948   /// Generate all the base pointers, section pointers, sizes and map
7949   /// types for the extracted mappable expressions. Also, for each item that
7950   /// relates with a device pointer, a pair of the relevant declaration and
7951   /// index where it occurs is appended to the device pointers info array.
generateAllInfo(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const7952   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7953                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7954                        MapFlagsArrayTy &Types) const {
7955     // We have to process the component lists that relate with the same
7956     // declaration in a single chunk so that we can generate the map flags
7957     // correctly. Therefore, we organize all lists in a map.
7958     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7959 
7960     // Helper function to fill the information map for the different supported
7961     // clauses.
7962     auto &&InfoGen =
7963         [&Info](const ValueDecl *D,
7964                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7965                 OpenMPMapClauseKind MapType,
7966                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7967                 bool ReturnDevicePointer, bool IsImplicit,
7968                 bool ForDeviceAddr = false) {
7969           const ValueDecl *VD =
7970               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7971           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7972                                 IsImplicit, ForDeviceAddr);
7973         };
7974 
7975     assert(CurDir.is<const OMPExecutableDirective *>() &&
7976            "Expect a executable directive");
7977     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7978     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7979       for (const auto L : C->component_lists()) {
7980         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7981             /*ReturnDevicePointer=*/false, C->isImplicit());
7982       }
7983     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7984       for (const auto L : C->component_lists()) {
7985         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7986             /*ReturnDevicePointer=*/false, C->isImplicit());
7987       }
7988     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7989       for (const auto L : C->component_lists()) {
7990         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7991             /*ReturnDevicePointer=*/false, C->isImplicit());
7992       }
7993 
7994     // Look at the use_device_ptr clause information and mark the existing map
7995     // entries as such. If there is no map information for an entry in the
7996     // use_device_ptr list, we create one with map type 'alloc' and zero size
7997     // section. It is the user fault if that was not mapped before. If there is
7998     // no map information and the pointer is a struct member, then we defer the
7999     // emission of that entry until the whole struct has been processed.
8000     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8001         DeferredInfo;
8002 
8003     for (const auto *C :
8004          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8005       for (const auto L : C->component_lists()) {
8006         assert(!L.second.empty() && "Not expecting empty list of components!");
8007         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8008         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8009         const Expr *IE = L.second.back().getAssociatedExpression();
8010         // If the first component is a member expression, we have to look into
8011         // 'this', which maps to null in the map of map information. Otherwise
8012         // look directly for the information.
8013         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8014 
8015         // We potentially have map information for this declaration already.
8016         // Look for the first set of components that refer to it.
8017         if (It != Info.end()) {
8018           auto CI = std::find_if(
8019               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8020                 return MI.Components.back().getAssociatedDeclaration() == VD;
8021               });
8022           // If we found a map entry, signal that the pointer has to be returned
8023           // and move on to the next declaration.
8024           if (CI != It->second.end()) {
8025             CI->ReturnDevicePointer = true;
8026             continue;
8027           }
8028         }
8029 
8030         // We didn't find any match in our map information - generate a zero
8031         // size array section - if the pointer is a struct member we defer this
8032         // action until the whole struct has been processed.
8033         if (isa<MemberExpr>(IE)) {
8034           // Insert the pointer into Info to be processed by
8035           // generateInfoForComponentList. Because it is a member pointer
8036           // without a pointee, no entry will be generated for it, therefore
8037           // we need to generate one after the whole struct has been processed.
8038           // Nonetheless, generateInfoForComponentList must be called to take
8039           // the pointer into account for the calculation of the range of the
8040           // partial struct.
8041           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8042                   /*ReturnDevicePointer=*/false, C->isImplicit());
8043           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8044         } else {
8045           llvm::Value *Ptr =
8046               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8047           BasePointers.emplace_back(Ptr, VD);
8048           Pointers.push_back(Ptr);
8049           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8050           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8051         }
8052       }
8053     }
8054 
8055     // Look at the use_device_addr clause information and mark the existing map
8056     // entries as such. If there is no map information for an entry in the
8057     // use_device_addr list, we create one with map type 'alloc' and zero size
8058     // section. It is the user fault if that was not mapped before. If there is
8059     // no map information and the pointer is a struct member, then we defer the
8060     // emission of that entry until the whole struct has been processed.
8061     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8062     for (const auto *C :
8063          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8064       for (const auto L : C->component_lists()) {
8065         assert(!L.second.empty() && "Not expecting empty list of components!");
8066         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8067         if (!Processed.insert(VD).second)
8068           continue;
8069         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8070         const Expr *IE = L.second.back().getAssociatedExpression();
8071         // If the first component is a member expression, we have to look into
8072         // 'this', which maps to null in the map of map information. Otherwise
8073         // look directly for the information.
8074         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8075 
8076         // We potentially have map information for this declaration already.
8077         // Look for the first set of components that refer to it.
8078         if (It != Info.end()) {
8079           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8080             return MI.Components.back().getAssociatedDeclaration() == VD;
8081           });
8082           // If we found a map entry, signal that the pointer has to be returned
8083           // and move on to the next declaration.
8084           if (CI != It->second.end()) {
8085             CI->ReturnDevicePointer = true;
8086             continue;
8087           }
8088         }
8089 
8090         // We didn't find any match in our map information - generate a zero
8091         // size array section - if the pointer is a struct member we defer this
8092         // action until the whole struct has been processed.
8093         if (isa<MemberExpr>(IE)) {
8094           // Insert the pointer into Info to be processed by
8095           // generateInfoForComponentList. Because it is a member pointer
8096           // without a pointee, no entry will be generated for it, therefore
8097           // we need to generate one after the whole struct has been processed.
8098           // Nonetheless, generateInfoForComponentList must be called to take
8099           // the pointer into account for the calculation of the range of the
8100           // partial struct.
8101           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8102                   /*ReturnDevicePointer=*/false, C->isImplicit(),
8103                   /*ForDeviceAddr=*/true);
8104           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8105         } else {
8106           llvm::Value *Ptr;
8107           if (IE->isGLValue())
8108             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8109           else
8110             Ptr = CGF.EmitScalarExpr(IE);
8111           BasePointers.emplace_back(Ptr, VD);
8112           Pointers.push_back(Ptr);
8113           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8114           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8115         }
8116       }
8117     }
8118 
8119     for (const auto &M : Info) {
8120       // We need to know when we generate information for the first component
8121       // associated with a capture, because the mapping flags depend on it.
8122       bool IsFirstComponentList = true;
8123 
8124       // Temporary versions of arrays
8125       MapBaseValuesArrayTy CurBasePointers;
8126       MapValuesArrayTy CurPointers;
8127       MapValuesArrayTy CurSizes;
8128       MapFlagsArrayTy CurTypes;
8129       StructRangeInfoTy PartialStruct;
8130 
8131       for (const MapInfo &L : M.second) {
8132         assert(!L.Components.empty() &&
8133                "Not expecting declaration with no component lists.");
8134 
8135         // Remember the current base pointer index.
8136         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8137         generateInfoForComponentList(
8138             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8139             CurPointers, CurSizes, CurTypes, PartialStruct,
8140             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8141 
8142         // If this entry relates with a device pointer, set the relevant
8143         // declaration and add the 'return pointer' flag.
8144         if (L.ReturnDevicePointer) {
8145           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8146                  "Unexpected number of mapped base pointers.");
8147 
8148           const ValueDecl *RelevantVD =
8149               L.Components.back().getAssociatedDeclaration();
8150           assert(RelevantVD &&
8151                  "No relevant declaration related with device pointer??");
8152 
8153           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8154           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8155         }
8156         IsFirstComponentList = false;
8157       }
8158 
8159       // Append any pending zero-length pointers which are struct members and
8160       // used with use_device_ptr or use_device_addr.
8161       auto CI = DeferredInfo.find(M.first);
8162       if (CI != DeferredInfo.end()) {
8163         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8164           llvm::Value *BasePtr;
8165           llvm::Value *Ptr;
8166           if (L.ForDeviceAddr) {
8167             if (L.IE->isGLValue())
8168               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8169             else
8170               Ptr = this->CGF.EmitScalarExpr(L.IE);
8171             BasePtr = Ptr;
8172             // Entry is RETURN_PARAM. Also, set the placeholder value
8173             // MEMBER_OF=FFFF so that the entry is later updated with the
8174             // correct value of MEMBER_OF.
8175             CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8176           } else {
8177             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8178             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8179                                              L.IE->getExprLoc());
8180             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8181             // value MEMBER_OF=FFFF so that the entry is later updated with the
8182             // correct value of MEMBER_OF.
8183             CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8184                                OMP_MAP_MEMBER_OF);
8185           }
8186           CurBasePointers.emplace_back(BasePtr, L.VD);
8187           CurPointers.push_back(Ptr);
8188           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8189         }
8190       }
8191 
8192       // If there is an entry in PartialStruct it means we have a struct with
8193       // individual members mapped. Emit an extra combined entry.
8194       if (PartialStruct.Base.isValid())
8195         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8196                           PartialStruct);
8197 
8198       // We need to append the results of this capture to what we already have.
8199       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8200       Pointers.append(CurPointers.begin(), CurPointers.end());
8201       Sizes.append(CurSizes.begin(), CurSizes.end());
8202       Types.append(CurTypes.begin(), CurTypes.end());
8203     }
8204   }
8205 
8206   /// Generate all the base pointers, section pointers, sizes and map types for
8207   /// the extracted map clauses of user-defined mapper.
generateAllInfoForMapper(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8208   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8209                                 MapValuesArrayTy &Pointers,
8210                                 MapValuesArrayTy &Sizes,
8211                                 MapFlagsArrayTy &Types) const {
8212     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8213            "Expect a declare mapper directive");
8214     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8215     // We have to process the component lists that relate with the same
8216     // declaration in a single chunk so that we can generate the map flags
8217     // correctly. Therefore, we organize all lists in a map.
8218     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8219 
8220     // Helper function to fill the information map for the different supported
8221     // clauses.
8222     auto &&InfoGen = [&Info](
8223         const ValueDecl *D,
8224         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8225         OpenMPMapClauseKind MapType,
8226         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8227         bool ReturnDevicePointer, bool IsImplicit) {
8228       const ValueDecl *VD =
8229           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8230       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8231                             IsImplicit);
8232     };
8233 
8234     for (const auto *C : CurMapperDir->clauselists()) {
8235       const auto *MC = cast<OMPMapClause>(C);
8236       for (const auto L : MC->component_lists()) {
8237         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8238                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8239       }
8240     }
8241 
8242     for (const auto &M : Info) {
8243       // We need to know when we generate information for the first component
8244       // associated with a capture, because the mapping flags depend on it.
8245       bool IsFirstComponentList = true;
8246 
8247       // Temporary versions of arrays
8248       MapBaseValuesArrayTy CurBasePointers;
8249       MapValuesArrayTy CurPointers;
8250       MapValuesArrayTy CurSizes;
8251       MapFlagsArrayTy CurTypes;
8252       StructRangeInfoTy PartialStruct;
8253 
8254       for (const MapInfo &L : M.second) {
8255         assert(!L.Components.empty() &&
8256                "Not expecting declaration with no component lists.");
8257         generateInfoForComponentList(
8258             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8259             CurPointers, CurSizes, CurTypes, PartialStruct,
8260             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8261         IsFirstComponentList = false;
8262       }
8263 
8264       // If there is an entry in PartialStruct it means we have a struct with
8265       // individual members mapped. Emit an extra combined entry.
8266       if (PartialStruct.Base.isValid())
8267         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8268                           PartialStruct);
8269 
8270       // We need to append the results of this capture to what we already have.
8271       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8272       Pointers.append(CurPointers.begin(), CurPointers.end());
8273       Sizes.append(CurSizes.begin(), CurSizes.end());
8274       Types.append(CurTypes.begin(), CurTypes.end());
8275     }
8276   }
8277 
8278   /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8279   void generateInfoForLambdaCaptures(
8280       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8281       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8282       MapFlagsArrayTy &Types,
8283       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8284     const auto *RD = VD->getType()
8285                          .getCanonicalType()
8286                          .getNonReferenceType()
8287                          ->getAsCXXRecordDecl();
8288     if (!RD || !RD->isLambda())
8289       return;
8290     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8291     LValue VDLVal = CGF.MakeAddrLValue(
8292         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8293     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8294     FieldDecl *ThisCapture = nullptr;
8295     RD->getCaptureFields(Captures, ThisCapture);
8296     if (ThisCapture) {
8297       LValue ThisLVal =
8298           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8299       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8300       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8301                                  VDLVal.getPointer(CGF));
8302       BasePointers.push_back(ThisLVal.getPointer(CGF));
8303       Pointers.push_back(ThisLValVal.getPointer(CGF));
8304       Sizes.push_back(
8305           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8306                                     CGF.Int64Ty, /*isSigned=*/true));
8307       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8308                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8309     }
8310     for (const LambdaCapture &LC : RD->captures()) {
8311       if (!LC.capturesVariable())
8312         continue;
8313       const VarDecl *VD = LC.getCapturedVar();
8314       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8315         continue;
8316       auto It = Captures.find(VD);
8317       assert(It != Captures.end() && "Found lambda capture without field.");
8318       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8319       if (LC.getCaptureKind() == LCK_ByRef) {
8320         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8321         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8322                                    VDLVal.getPointer(CGF));
8323         BasePointers.push_back(VarLVal.getPointer(CGF));
8324         Pointers.push_back(VarLValVal.getPointer(CGF));
8325         Sizes.push_back(CGF.Builder.CreateIntCast(
8326             CGF.getTypeSize(
8327                 VD->getType().getCanonicalType().getNonReferenceType()),
8328             CGF.Int64Ty, /*isSigned=*/true));
8329       } else {
8330         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8331         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8332                                    VDLVal.getPointer(CGF));
8333         BasePointers.push_back(VarLVal.getPointer(CGF));
8334         Pointers.push_back(VarRVal.getScalarVal());
8335         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8336       }
8337       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8338                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8339     }
8340   }
8341 
8342   /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8343   void adjustMemberOfForLambdaCaptures(
8344       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8345       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8346       MapFlagsArrayTy &Types) const {
8347     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8348       // Set correct member_of idx for all implicit lambda captures.
8349       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8350                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8351         continue;
8352       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8353       assert(BasePtr && "Unable to find base lambda address.");
8354       int TgtIdx = -1;
8355       for (unsigned J = I; J > 0; --J) {
8356         unsigned Idx = J - 1;
8357         if (Pointers[Idx] != BasePtr)
8358           continue;
8359         TgtIdx = Idx;
8360         break;
8361       }
8362       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8363       // All other current entries will be MEMBER_OF the combined entry
8364       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8365       // 0xFFFF in the MEMBER_OF field).
8366       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8367       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8368     }
8369   }
8370 
8371   /// Generate the base pointers, section pointers, sizes and map types
8372   /// associated to a given capture.
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct) const8373   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8374                               llvm::Value *Arg,
8375                               MapBaseValuesArrayTy &BasePointers,
8376                               MapValuesArrayTy &Pointers,
8377                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8378                               StructRangeInfoTy &PartialStruct) const {
8379     assert(!Cap->capturesVariableArrayType() &&
8380            "Not expecting to generate map info for a variable array type!");
8381 
8382     // We need to know when we generating information for the first component
8383     const ValueDecl *VD = Cap->capturesThis()
8384                               ? nullptr
8385                               : Cap->getCapturedVar()->getCanonicalDecl();
8386 
8387     // If this declaration appears in a is_device_ptr clause we just have to
8388     // pass the pointer by value. If it is a reference to a declaration, we just
8389     // pass its value.
8390     if (DevPointersMap.count(VD)) {
8391       BasePointers.emplace_back(Arg, VD);
8392       Pointers.push_back(Arg);
8393       Sizes.push_back(
8394           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8395                                     CGF.Int64Ty, /*isSigned=*/true));
8396       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8397       return;
8398     }
8399 
8400     using MapData =
8401         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8402                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8403     SmallVector<MapData, 4> DeclComponentLists;
8404     assert(CurDir.is<const OMPExecutableDirective *>() &&
8405            "Expect a executable directive");
8406     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8407     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8408       for (const auto L : C->decl_component_lists(VD)) {
8409         assert(L.first == VD &&
8410                "We got information for the wrong declaration??");
8411         assert(!L.second.empty() &&
8412                "Not expecting declaration with no component lists.");
8413         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8414                                         C->getMapTypeModifiers(),
8415                                         C->isImplicit());
8416       }
8417     }
8418 
8419     // Find overlapping elements (including the offset from the base element).
8420     llvm::SmallDenseMap<
8421         const MapData *,
8422         llvm::SmallVector<
8423             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8424         4>
8425         OverlappedData;
8426     size_t Count = 0;
8427     for (const MapData &L : DeclComponentLists) {
8428       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8429       OpenMPMapClauseKind MapType;
8430       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8431       bool IsImplicit;
8432       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8433       ++Count;
8434       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8435         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8436         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8437         auto CI = Components.rbegin();
8438         auto CE = Components.rend();
8439         auto SI = Components1.rbegin();
8440         auto SE = Components1.rend();
8441         for (; CI != CE && SI != SE; ++CI, ++SI) {
8442           if (CI->getAssociatedExpression()->getStmtClass() !=
8443               SI->getAssociatedExpression()->getStmtClass())
8444             break;
8445           // Are we dealing with different variables/fields?
8446           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8447             break;
8448         }
8449         // Found overlapping if, at least for one component, reached the head of
8450         // the components list.
8451         if (CI == CE || SI == SE) {
8452           assert((CI != CE || SI != SE) &&
8453                  "Unexpected full match of the mapping components.");
8454           const MapData &BaseData = CI == CE ? L : L1;
8455           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8456               SI == SE ? Components : Components1;
8457           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8458           OverlappedElements.getSecond().push_back(SubData);
8459         }
8460       }
8461     }
8462     // Sort the overlapped elements for each item.
8463     llvm::SmallVector<const FieldDecl *, 4> Layout;
8464     if (!OverlappedData.empty()) {
8465       if (const auto *CRD =
8466               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8467         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8468       else {
8469         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8470         Layout.append(RD->field_begin(), RD->field_end());
8471       }
8472     }
8473     for (auto &Pair : OverlappedData) {
8474       llvm::sort(
8475           Pair.getSecond(),
8476           [&Layout](
8477               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8478               OMPClauseMappableExprCommon::MappableExprComponentListRef
8479                   Second) {
8480             auto CI = First.rbegin();
8481             auto CE = First.rend();
8482             auto SI = Second.rbegin();
8483             auto SE = Second.rend();
8484             for (; CI != CE && SI != SE; ++CI, ++SI) {
8485               if (CI->getAssociatedExpression()->getStmtClass() !=
8486                   SI->getAssociatedExpression()->getStmtClass())
8487                 break;
8488               // Are we dealing with different variables/fields?
8489               if (CI->getAssociatedDeclaration() !=
8490                   SI->getAssociatedDeclaration())
8491                 break;
8492             }
8493 
8494             // Lists contain the same elements.
8495             if (CI == CE && SI == SE)
8496               return false;
8497 
8498             // List with less elements is less than list with more elements.
8499             if (CI == CE || SI == SE)
8500               return CI == CE;
8501 
8502             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8503             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8504             if (FD1->getParent() == FD2->getParent())
8505               return FD1->getFieldIndex() < FD2->getFieldIndex();
8506             const auto It =
8507                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8508                   return FD == FD1 || FD == FD2;
8509                 });
8510             return *It == FD1;
8511           });
8512     }
8513 
8514     // Associated with a capture, because the mapping flags depend on it.
8515     // Go through all of the elements with the overlapped elements.
8516     for (const auto &Pair : OverlappedData) {
8517       const MapData &L = *Pair.getFirst();
8518       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8519       OpenMPMapClauseKind MapType;
8520       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8521       bool IsImplicit;
8522       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8523       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8524           OverlappedComponents = Pair.getSecond();
8525       bool IsFirstComponentList = true;
8526       generateInfoForComponentList(
8527           MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8528           Types, PartialStruct, IsFirstComponentList, IsImplicit,
8529           /*ForDeviceAddr=*/false, OverlappedComponents);
8530     }
8531     // Go through other elements without overlapped elements.
8532     bool IsFirstComponentList = OverlappedData.empty();
8533     for (const MapData &L : DeclComponentLists) {
8534       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8535       OpenMPMapClauseKind MapType;
8536       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8537       bool IsImplicit;
8538       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8539       auto It = OverlappedData.find(&L);
8540       if (It == OverlappedData.end())
8541         generateInfoForComponentList(MapType, MapModifiers, Components,
8542                                      BasePointers, Pointers, Sizes, Types,
8543                                      PartialStruct, IsFirstComponentList,
8544                                      IsImplicit);
8545       IsFirstComponentList = false;
8546     }
8547   }
8548 
8549   /// Generate the base pointers, section pointers, sizes and map types
8550   /// associated with the declare target link variables.
generateInfoForDeclareTargetLink(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8551   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8552                                         MapValuesArrayTy &Pointers,
8553                                         MapValuesArrayTy &Sizes,
8554                                         MapFlagsArrayTy &Types) const {
8555     assert(CurDir.is<const OMPExecutableDirective *>() &&
8556            "Expect a executable directive");
8557     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8558     // Map other list items in the map clause which are not captured variables
8559     // but "declare target link" global variables.
8560     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8561       for (const auto L : C->component_lists()) {
8562         if (!L.first)
8563           continue;
8564         const auto *VD = dyn_cast<VarDecl>(L.first);
8565         if (!VD)
8566           continue;
8567         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8568             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8569         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8570             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8571           continue;
8572         StructRangeInfoTy PartialStruct;
8573         generateInfoForComponentList(
8574             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8575             Pointers, Sizes, Types, PartialStruct,
8576             /*IsFirstComponentList=*/true, C->isImplicit());
8577         assert(!PartialStruct.Base.isValid() &&
8578                "No partial structs for declare target link expected.");
8579       }
8580     }
8581   }
8582 
8583   /// Generate the default map information for a given capture \a CI,
8584   /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapBaseValuesArrayTy & CurBasePointers,MapValuesArrayTy & CurPointers,MapValuesArrayTy & CurSizes,MapFlagsArrayTy & CurMapTypes) const8585   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8586                               const FieldDecl &RI, llvm::Value *CV,
8587                               MapBaseValuesArrayTy &CurBasePointers,
8588                               MapValuesArrayTy &CurPointers,
8589                               MapValuesArrayTy &CurSizes,
8590                               MapFlagsArrayTy &CurMapTypes) const {
8591     bool IsImplicit = true;
8592     // Do the default mapping.
8593     if (CI.capturesThis()) {
8594       CurBasePointers.push_back(CV);
8595       CurPointers.push_back(CV);
8596       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8597       CurSizes.push_back(
8598           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8599                                     CGF.Int64Ty, /*isSigned=*/true));
8600       // Default map type.
8601       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8602     } else if (CI.capturesVariableByCopy()) {
8603       CurBasePointers.push_back(CV);
8604       CurPointers.push_back(CV);
8605       if (!RI.getType()->isAnyPointerType()) {
8606         // We have to signal to the runtime captures passed by value that are
8607         // not pointers.
8608         CurMapTypes.push_back(OMP_MAP_LITERAL);
8609         CurSizes.push_back(CGF.Builder.CreateIntCast(
8610             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8611       } else {
8612         // Pointers are implicitly mapped with a zero size and no flags
8613         // (other than first map that is added for all implicit maps).
8614         CurMapTypes.push_back(OMP_MAP_NONE);
8615         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8616       }
8617       const VarDecl *VD = CI.getCapturedVar();
8618       auto I = FirstPrivateDecls.find(VD);
8619       if (I != FirstPrivateDecls.end())
8620         IsImplicit = I->getSecond();
8621     } else {
8622       assert(CI.capturesVariable() && "Expected captured reference.");
8623       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8624       QualType ElementType = PtrTy->getPointeeType();
8625       CurSizes.push_back(CGF.Builder.CreateIntCast(
8626           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8627       // The default map type for a scalar/complex type is 'to' because by
8628       // default the value doesn't have to be retrieved. For an aggregate
8629       // type, the default is 'tofrom'.
8630       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8631       const VarDecl *VD = CI.getCapturedVar();
8632       auto I = FirstPrivateDecls.find(VD);
8633       if (I != FirstPrivateDecls.end() &&
8634           VD->getType().isConstant(CGF.getContext())) {
8635         llvm::Constant *Addr =
8636             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8637         // Copy the value of the original variable to the new global copy.
8638         CGF.Builder.CreateMemCpy(
8639             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8640             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8641             CurSizes.back(), /*IsVolatile=*/false);
8642         // Use new global variable as the base pointers.
8643         CurBasePointers.push_back(Addr);
8644         CurPointers.push_back(Addr);
8645       } else {
8646         CurBasePointers.push_back(CV);
8647         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8648           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8649               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8650               AlignmentSource::Decl));
8651           CurPointers.push_back(PtrAddr.getPointer());
8652         } else {
8653           CurPointers.push_back(CV);
8654         }
8655       }
8656       if (I != FirstPrivateDecls.end())
8657         IsImplicit = I->getSecond();
8658     }
8659     // Every default map produces a single argument which is a target parameter.
8660     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8661 
8662     // Add flag stating this is an implicit map.
8663     if (IsImplicit)
8664       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8665   }
8666 };
8667 } // anonymous namespace
8668 
8669 /// Emit the arrays used to pass the captures and map information to the
8670 /// offloading runtime library. If there is no map or capture information,
8671 /// return nullptr by reference.
8672 static void
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapBaseValuesArrayTy & BasePointers,MappableExprsHandler::MapValuesArrayTy & Pointers,MappableExprsHandler::MapValuesArrayTy & Sizes,MappableExprsHandler::MapFlagsArrayTy & MapTypes,CGOpenMPRuntime::TargetDataInfo & Info)8673 emitOffloadingArrays(CodeGenFunction &CGF,
8674                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8675                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8676                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8677                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8678                      CGOpenMPRuntime::TargetDataInfo &Info) {
8679   CodeGenModule &CGM = CGF.CGM;
8680   ASTContext &Ctx = CGF.getContext();
8681 
8682   // Reset the array information.
8683   Info.clearArrayInfo();
8684   Info.NumberOfPtrs = BasePointers.size();
8685 
8686   if (Info.NumberOfPtrs) {
8687     // Detect if we have any capture size requiring runtime evaluation of the
8688     // size so that a constant array could be eventually used.
8689     bool hasRuntimeEvaluationCaptureSize = false;
8690     for (llvm::Value *S : Sizes)
8691       if (!isa<llvm::Constant>(S)) {
8692         hasRuntimeEvaluationCaptureSize = true;
8693         break;
8694       }
8695 
8696     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8697     QualType PointerArrayType = Ctx.getConstantArrayType(
8698         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8699         /*IndexTypeQuals=*/0);
8700 
8701     Info.BasePointersArray =
8702         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8703     Info.PointersArray =
8704         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8705 
8706     // If we don't have any VLA types or other types that require runtime
8707     // evaluation, we can use a constant array for the map sizes, otherwise we
8708     // need to fill up the arrays as we do for the pointers.
8709     QualType Int64Ty =
8710         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8711     if (hasRuntimeEvaluationCaptureSize) {
8712       QualType SizeArrayType = Ctx.getConstantArrayType(
8713           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8714           /*IndexTypeQuals=*/0);
8715       Info.SizesArray =
8716           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8717     } else {
8718       // We expect all the sizes to be constant, so we collect them to create
8719       // a constant array.
8720       SmallVector<llvm::Constant *, 16> ConstSizes;
8721       for (llvm::Value *S : Sizes)
8722         ConstSizes.push_back(cast<llvm::Constant>(S));
8723 
8724       auto *SizesArrayInit = llvm::ConstantArray::get(
8725           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8726       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8727       auto *SizesArrayGbl = new llvm::GlobalVariable(
8728           CGM.getModule(), SizesArrayInit->getType(),
8729           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8730           SizesArrayInit, Name);
8731       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8732       Info.SizesArray = SizesArrayGbl;
8733     }
8734 
8735     // The map types are always constant so we don't need to generate code to
8736     // fill arrays. Instead, we create an array constant.
8737     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8738     llvm::copy(MapTypes, Mapping.begin());
8739     llvm::Constant *MapTypesArrayInit =
8740         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8741     std::string MaptypesName =
8742         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8743     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8744         CGM.getModule(), MapTypesArrayInit->getType(),
8745         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8746         MapTypesArrayInit, MaptypesName);
8747     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8748     Info.MapTypesArray = MapTypesArrayGbl;
8749 
8750     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8751       llvm::Value *BPVal = *BasePointers[I];
8752       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8753           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8754           Info.BasePointersArray, 0, I);
8755       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8756           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8757       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8758       CGF.Builder.CreateStore(BPVal, BPAddr);
8759 
8760       if (Info.requiresDevicePointerInfo())
8761         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8762           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8763 
8764       llvm::Value *PVal = Pointers[I];
8765       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8766           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8767           Info.PointersArray, 0, I);
8768       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8769           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8770       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8771       CGF.Builder.CreateStore(PVal, PAddr);
8772 
8773       if (hasRuntimeEvaluationCaptureSize) {
8774         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8775             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8776             Info.SizesArray,
8777             /*Idx0=*/0,
8778             /*Idx1=*/I);
8779         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8780         CGF.Builder.CreateStore(
8781             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8782             SAddr);
8783       }
8784     }
8785   }
8786 }
8787 
8788 /// Emit the arguments to be passed to the runtime library based on the
8789 /// arrays of pointers, sizes and map types.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,CGOpenMPRuntime::TargetDataInfo & Info)8790 static void emitOffloadingArraysArgument(
8791     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8792     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8793     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8794   CodeGenModule &CGM = CGF.CGM;
8795   if (Info.NumberOfPtrs) {
8796     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8797         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8798         Info.BasePointersArray,
8799         /*Idx0=*/0, /*Idx1=*/0);
8800     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8801         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8802         Info.PointersArray,
8803         /*Idx0=*/0,
8804         /*Idx1=*/0);
8805     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8806         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8807         /*Idx0=*/0, /*Idx1=*/0);
8808     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8809         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8810         Info.MapTypesArray,
8811         /*Idx0=*/0,
8812         /*Idx1=*/0);
8813   } else {
8814     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8815     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8816     unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
8817     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo(DefaultAS));
8818     MapTypesArrayArg =
8819         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo(DefaultAS));
8820   }
8821 }
8822 
8823 /// Check for inner distribute directive.
8824 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)8825 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8826   const auto *CS = D.getInnermostCapturedStmt();
8827   const auto *Body =
8828       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8829   const Stmt *ChildStmt =
8830       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8831 
8832   if (const auto *NestedDir =
8833           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8834     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8835     switch (D.getDirectiveKind()) {
8836     case OMPD_target:
8837       if (isOpenMPDistributeDirective(DKind))
8838         return NestedDir;
8839       if (DKind == OMPD_teams) {
8840         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8841             /*IgnoreCaptured=*/true);
8842         if (!Body)
8843           return nullptr;
8844         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8845         if (const auto *NND =
8846                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8847           DKind = NND->getDirectiveKind();
8848           if (isOpenMPDistributeDirective(DKind))
8849             return NND;
8850         }
8851       }
8852       return nullptr;
8853     case OMPD_target_teams:
8854       if (isOpenMPDistributeDirective(DKind))
8855         return NestedDir;
8856       return nullptr;
8857     case OMPD_target_parallel:
8858     case OMPD_target_simd:
8859     case OMPD_target_parallel_for:
8860     case OMPD_target_parallel_for_simd:
8861       return nullptr;
8862     case OMPD_target_teams_distribute:
8863     case OMPD_target_teams_distribute_simd:
8864     case OMPD_target_teams_distribute_parallel_for:
8865     case OMPD_target_teams_distribute_parallel_for_simd:
8866     case OMPD_parallel:
8867     case OMPD_for:
8868     case OMPD_parallel_for:
8869     case OMPD_parallel_master:
8870     case OMPD_parallel_sections:
8871     case OMPD_for_simd:
8872     case OMPD_parallel_for_simd:
8873     case OMPD_cancel:
8874     case OMPD_cancellation_point:
8875     case OMPD_ordered:
8876     case OMPD_threadprivate:
8877     case OMPD_allocate:
8878     case OMPD_task:
8879     case OMPD_simd:
8880     case OMPD_sections:
8881     case OMPD_section:
8882     case OMPD_single:
8883     case OMPD_master:
8884     case OMPD_critical:
8885     case OMPD_taskyield:
8886     case OMPD_barrier:
8887     case OMPD_taskwait:
8888     case OMPD_taskgroup:
8889     case OMPD_atomic:
8890     case OMPD_flush:
8891     case OMPD_depobj:
8892     case OMPD_scan:
8893     case OMPD_teams:
8894     case OMPD_target_data:
8895     case OMPD_target_exit_data:
8896     case OMPD_target_enter_data:
8897     case OMPD_distribute:
8898     case OMPD_distribute_simd:
8899     case OMPD_distribute_parallel_for:
8900     case OMPD_distribute_parallel_for_simd:
8901     case OMPD_teams_distribute:
8902     case OMPD_teams_distribute_simd:
8903     case OMPD_teams_distribute_parallel_for:
8904     case OMPD_teams_distribute_parallel_for_simd:
8905     case OMPD_target_update:
8906     case OMPD_declare_simd:
8907     case OMPD_declare_variant:
8908     case OMPD_begin_declare_variant:
8909     case OMPD_end_declare_variant:
8910     case OMPD_declare_target:
8911     case OMPD_end_declare_target:
8912     case OMPD_declare_reduction:
8913     case OMPD_declare_mapper:
8914     case OMPD_taskloop:
8915     case OMPD_taskloop_simd:
8916     case OMPD_master_taskloop:
8917     case OMPD_master_taskloop_simd:
8918     case OMPD_parallel_master_taskloop:
8919     case OMPD_parallel_master_taskloop_simd:
8920     case OMPD_requires:
8921     case OMPD_unknown:
8922     default:
8923       llvm_unreachable("Unexpected directive.");
8924     }
8925   }
8926 
8927   return nullptr;
8928 }
8929 
8930 /// Emit the user-defined mapper function. The code generation follows the
8931 /// pattern in the example below.
8932 /// \code
8933 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8934 ///                                           void *base, void *begin,
8935 ///                                           int64_t size, int64_t type) {
8936 ///   // Allocate space for an array section first.
8937 ///   if (size > 1 && !maptype.IsDelete)
8938 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8939 ///                                 size*sizeof(Ty), clearToFrom(type));
8940 ///   // Map members.
8941 ///   for (unsigned i = 0; i < size; i++) {
8942 ///     // For each component specified by this mapper:
8943 ///     for (auto c : all_components) {
8944 ///       if (c.hasMapper())
8945 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8946 ///                       c.arg_type);
8947 ///       else
8948 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8949 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8950 ///     }
8951 ///   }
8952 ///   // Delete the array section.
8953 ///   if (size > 1 && maptype.IsDelete)
8954 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8955 ///                                 size*sizeof(Ty), clearToFrom(type));
8956 /// }
8957 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)8958 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8959                                             CodeGenFunction *CGF) {
8960   if (UDMMap.count(D) > 0)
8961     return;
8962   ASTContext &C = CGM.getContext();
8963   QualType Ty = D->getType();
8964   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8965   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8966   auto *MapperVarDecl =
8967       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8968   SourceLocation Loc = D->getLocation();
8969   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8970 
8971   // Prepare mapper function arguments and attributes.
8972   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8973                               C.VoidPtrTy, ImplicitParamDecl::Other);
8974   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8975                             ImplicitParamDecl::Other);
8976   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8977                              C.VoidPtrTy, ImplicitParamDecl::Other);
8978   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8979                             ImplicitParamDecl::Other);
8980   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8981                             ImplicitParamDecl::Other);
8982   FunctionArgList Args;
8983   Args.push_back(&HandleArg);
8984   Args.push_back(&BaseArg);
8985   Args.push_back(&BeginArg);
8986   Args.push_back(&SizeArg);
8987   Args.push_back(&TypeArg);
8988   const CGFunctionInfo &FnInfo =
8989       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8990   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8991   SmallString<64> TyStr;
8992   llvm::raw_svector_ostream Out(TyStr);
8993   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8994   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8995   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8996                                     Name, &CGM.getModule());
8997   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8998   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8999   // Start the mapper function code generation.
9000   CodeGenFunction MapperCGF(CGM);
9001   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9002   // Compute the starting and end addreses of array elements.
9003   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9004       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9005       C.getPointerType(Int64Ty), Loc);
9006   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9007       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9008       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9009   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9010   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9011       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9012       C.getPointerType(Int64Ty), Loc);
9013   // Prepare common arguments for array initiation and deletion.
9014   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9015       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9016       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9017   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9018       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9019       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9020   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9021       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9022       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9023 
9024   // Emit array initiation if this is an array section and \p MapType indicates
9025   // that memory allocation is required.
9026   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9027   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9028                              ElementSize, HeadBB, /*IsInit=*/true);
9029 
9030   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9031 
9032   // Emit the loop header block.
9033   MapperCGF.EmitBlock(HeadBB);
9034   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9035   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9036   // Evaluate whether the initial condition is satisfied.
9037   llvm::Value *IsEmpty =
9038       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9039   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9040   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9041 
9042   // Emit the loop body block.
9043   MapperCGF.EmitBlock(BodyBB);
9044   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9045       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9046   PtrPHI->addIncoming(PtrBegin, EntryBB);
9047   Address PtrCurrent =
9048       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9049                           .getAlignment()
9050                           .alignmentOfArrayElement(ElementSize));
9051   // Privatize the declared variable of mapper to be the current array element.
9052   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9053   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9054     return MapperCGF
9055         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9056         .getAddress(MapperCGF);
9057   });
9058   (void)Scope.Privatize();
9059 
9060   // Get map clause information. Fill up the arrays with all mapped variables.
9061   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9062   MappableExprsHandler::MapValuesArrayTy Pointers;
9063   MappableExprsHandler::MapValuesArrayTy Sizes;
9064   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9065   MappableExprsHandler MEHandler(*D, MapperCGF);
9066   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9067 
9068   // Call the runtime API __tgt_mapper_num_components to get the number of
9069   // pre-existing components.
9070   llvm::Value *OffloadingArgs[] = {Handle};
9071   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9072       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9073                                             OMPRTL___tgt_mapper_num_components),
9074       OffloadingArgs);
9075   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9076       PreviousSize,
9077       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9078 
9079   // Fill up the runtime mapper handle for all components.
9080   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9081     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9082         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9083     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9084         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9085     llvm::Value *CurSizeArg = Sizes[I];
9086 
9087     // Extract the MEMBER_OF field from the map type.
9088     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9089     MapperCGF.EmitBlock(MemberBB);
9090     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9091     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9092         OriMapType,
9093         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9094     llvm::BasicBlock *MemberCombineBB =
9095         MapperCGF.createBasicBlock("omp.member.combine");
9096     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9097     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9098     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9099     // Add the number of pre-existing components to the MEMBER_OF field if it
9100     // is valid.
9101     MapperCGF.EmitBlock(MemberCombineBB);
9102     llvm::Value *CombinedMember =
9103         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9104     // Do nothing if it is not a member of previous components.
9105     MapperCGF.EmitBlock(TypeBB);
9106     llvm::PHINode *MemberMapType =
9107         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9108     MemberMapType->addIncoming(OriMapType, MemberBB);
9109     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9110 
9111     // Combine the map type inherited from user-defined mapper with that
9112     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9113     // bits of the \a MapType, which is the input argument of the mapper
9114     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9115     // bits of MemberMapType.
9116     // [OpenMP 5.0], 1.2.6. map-type decay.
9117     //        | alloc |  to   | from  | tofrom | release | delete
9118     // ----------------------------------------------------------
9119     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9120     // to     | alloc |  to   | alloc |   to   | release | delete
9121     // from   | alloc | alloc | from  |  from  | release | delete
9122     // tofrom | alloc |  to   | from  | tofrom | release | delete
9123     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9124         MapType,
9125         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9126                                    MappableExprsHandler::OMP_MAP_FROM));
9127     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9128     llvm::BasicBlock *AllocElseBB =
9129         MapperCGF.createBasicBlock("omp.type.alloc.else");
9130     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9131     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9132     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9133     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9134     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9135     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9136     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9137     MapperCGF.EmitBlock(AllocBB);
9138     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9139         MemberMapType,
9140         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9141                                      MappableExprsHandler::OMP_MAP_FROM)));
9142     MapperCGF.Builder.CreateBr(EndBB);
9143     MapperCGF.EmitBlock(AllocElseBB);
9144     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9145         LeftToFrom,
9146         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9147     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9148     // In case of to, clear OMP_MAP_FROM.
9149     MapperCGF.EmitBlock(ToBB);
9150     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9151         MemberMapType,
9152         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9153     MapperCGF.Builder.CreateBr(EndBB);
9154     MapperCGF.EmitBlock(ToElseBB);
9155     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9156         LeftToFrom,
9157         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9158     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9159     // In case of from, clear OMP_MAP_TO.
9160     MapperCGF.EmitBlock(FromBB);
9161     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9162         MemberMapType,
9163         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9164     // In case of tofrom, do nothing.
9165     MapperCGF.EmitBlock(EndBB);
9166     llvm::PHINode *CurMapType =
9167         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9168     CurMapType->addIncoming(AllocMapType, AllocBB);
9169     CurMapType->addIncoming(ToMapType, ToBB);
9170     CurMapType->addIncoming(FromMapType, FromBB);
9171     CurMapType->addIncoming(MemberMapType, ToElseBB);
9172 
9173     // TODO: call the corresponding mapper function if a user-defined mapper is
9174     // associated with this map clause.
9175     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9176     // data structure.
9177     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9178                                      CurSizeArg, CurMapType};
9179     MapperCGF.EmitRuntimeCall(
9180         OMPBuilder.getOrCreateRuntimeFunction(
9181             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9182         OffloadingArgs);
9183   }
9184 
9185   // Update the pointer to point to the next element that needs to be mapped,
9186   // and check whether we have mapped all elements.
9187   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9188       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9189   PtrPHI->addIncoming(PtrNext, BodyBB);
9190   llvm::Value *IsDone =
9191       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9192   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9193   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9194 
9195   MapperCGF.EmitBlock(ExitBB);
9196   // Emit array deletion if this is an array section and \p MapType indicates
9197   // that deletion is required.
9198   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9199                              ElementSize, DoneBB, /*IsInit=*/false);
9200 
9201   // Emit the function exit block.
9202   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9203   MapperCGF.FinishFunction();
9204   UDMMap.try_emplace(D, Fn);
9205   if (CGF) {
9206     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9207     Decls.second.push_back(D);
9208   }
9209 }
9210 
9211 /// Emit the array initialization or deletion portion for user-defined mapper
9212 /// code generation. First, it evaluates whether an array section is mapped and
9213 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9214 /// true, and \a MapType indicates to not delete this array, array
9215 /// initialization code is generated. If \a IsInit is false, and \a MapType
9216 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)9217 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9218     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9219     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9220     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9221   StringRef Prefix = IsInit ? ".init" : ".del";
9222 
9223   // Evaluate if this is an array section.
9224   llvm::BasicBlock *IsDeleteBB =
9225       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9226   llvm::BasicBlock *BodyBB =
9227       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9228   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9229       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9230   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9231 
9232   // Evaluate if we are going to delete this section.
9233   MapperCGF.EmitBlock(IsDeleteBB);
9234   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9235       MapType,
9236       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9237   llvm::Value *DeleteCond;
9238   if (IsInit) {
9239     DeleteCond = MapperCGF.Builder.CreateIsNull(
9240         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9241   } else {
9242     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9243         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9244   }
9245   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9246 
9247   MapperCGF.EmitBlock(BodyBB);
9248   // Get the array size by multiplying element size and element number (i.e., \p
9249   // Size).
9250   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9251       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9252   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9253   // memory allocation/deletion purpose only.
9254   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9255       MapType,
9256       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9257                                    MappableExprsHandler::OMP_MAP_FROM)));
9258   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9259   // data structure.
9260   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9261   MapperCGF.EmitRuntimeCall(
9262       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9263                                             OMPRTL___tgt_push_mapper_component),
9264       OffloadingArgs);
9265 }
9266 
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * DeviceID,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9267 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9268     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9269     llvm::Value *DeviceID,
9270     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9271                                      const OMPLoopDirective &D)>
9272         SizeEmitter) {
9273   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9274   const OMPExecutableDirective *TD = &D;
9275   // Get nested teams distribute kind directive, if any.
9276   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9277     TD = getNestedDistributeDirective(CGM.getContext(), D);
9278   if (!TD)
9279     return;
9280   const auto *LD = cast<OMPLoopDirective>(TD);
9281   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9282                                                      PrePostActionTy &) {
9283     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9284       llvm::Value *Args[] = {DeviceID, NumIterations};
9285       CGF.EmitRuntimeCall(
9286           OMPBuilder.getOrCreateRuntimeFunction(
9287               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9288           Args);
9289     }
9290   };
9291   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9292 }
9293 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9294 void CGOpenMPRuntime::emitTargetCall(
9295     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9296     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9297     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9298     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9299                                      const OMPLoopDirective &D)>
9300         SizeEmitter) {
9301   if (!CGF.HaveInsertPoint())
9302     return;
9303 
9304   assert(OutlinedFn && "Invalid outlined function!");
9305 
9306   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9307   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9308   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9309   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9310                                             PrePostActionTy &) {
9311     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9312   };
9313   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9314 
9315   CodeGenFunction::OMPTargetDataInfo InputInfo;
9316   llvm::Value *MapTypesArray = nullptr;
9317   // Fill up the pointer arrays and transfer execution to the device.
9318   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9319                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9320                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9321     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9322       // Reverse offloading is not supported, so just execute on the host.
9323       if (RequiresOuterTask) {
9324         CapturedVars.clear();
9325         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9326       }
9327       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9328       return;
9329     }
9330 
9331     // On top of the arrays that were filled up, the target offloading call
9332     // takes as arguments the device id as well as the host pointer. The host
9333     // pointer is used by the runtime library to identify the current target
9334     // region, so it only has to be unique and not necessarily point to
9335     // anything. It could be the pointer to the outlined function that
9336     // implements the target region, but we aren't using that so that the
9337     // compiler doesn't need to keep that, and could therefore inline the host
9338     // function if proven worthwhile during optimization.
9339 
9340     // From this point on, we need to have an ID of the target region defined.
9341     assert(OutlinedFnID && "Invalid outlined function ID!");
9342 
9343     // Emit device ID if any.
9344     llvm::Value *DeviceID;
9345     if (Device.getPointer()) {
9346       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9347               Device.getInt() == OMPC_DEVICE_device_num) &&
9348              "Expected device_num modifier.");
9349       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9350       DeviceID =
9351           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9352     } else {
9353       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9354     }
9355 
9356     // Emit the number of elements in the offloading arrays.
9357     llvm::Value *PointerNum =
9358         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9359 
9360     // Return value of the runtime offloading call.
9361     llvm::Value *Return;
9362 
9363     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9364     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9365 
9366     // Emit tripcount for the target loop-based directive.
9367     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9368 
9369     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9370     // The target region is an outlined function launched by the runtime
9371     // via calls __tgt_target() or __tgt_target_teams().
9372     //
9373     // __tgt_target() launches a target region with one team and one thread,
9374     // executing a serial region.  This master thread may in turn launch
9375     // more threads within its team upon encountering a parallel region,
9376     // however, no additional teams can be launched on the device.
9377     //
9378     // __tgt_target_teams() launches a target region with one or more teams,
9379     // each with one or more threads.  This call is required for target
9380     // constructs such as:
9381     //  'target teams'
9382     //  'target' / 'teams'
9383     //  'target teams distribute parallel for'
9384     //  'target parallel'
9385     // and so on.
9386     //
9387     // Note that on the host and CPU targets, the runtime implementation of
9388     // these calls simply call the outlined function without forking threads.
9389     // The outlined functions themselves have runtime calls to
9390     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9391     // the compiler in emitTeamsCall() and emitParallelCall().
9392     //
9393     // In contrast, on the NVPTX target, the implementation of
9394     // __tgt_target_teams() launches a GPU kernel with the requested number
9395     // of teams and threads so no additional calls to the runtime are required.
9396     if (NumTeams) {
9397       // If we have NumTeams defined this means that we have an enclosed teams
9398       // region. Therefore we also expect to have NumThreads defined. These two
9399       // values should be defined in the presence of a teams directive,
9400       // regardless of having any clauses associated. If the user is using teams
9401       // but no clauses, these two values will be the default that should be
9402       // passed to the runtime library - a 32-bit integer with the value zero.
9403       assert(NumThreads && "Thread limit expression should be available along "
9404                            "with number of teams.");
9405       llvm::Value *OffloadingArgs[] = {DeviceID,
9406                                        OutlinedFnID,
9407                                        PointerNum,
9408                                        InputInfo.BasePointersArray.getPointer(),
9409                                        InputInfo.PointersArray.getPointer(),
9410                                        InputInfo.SizesArray.getPointer(),
9411                                        MapTypesArray,
9412                                        NumTeams,
9413                                        NumThreads};
9414       Return = CGF.EmitRuntimeCall(
9415           OMPBuilder.getOrCreateRuntimeFunction(
9416               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9417                                          : OMPRTL___tgt_target_teams),
9418           OffloadingArgs);
9419     } else {
9420       llvm::Value *OffloadingArgs[] = {DeviceID,
9421                                        OutlinedFnID,
9422                                        PointerNum,
9423                                        InputInfo.BasePointersArray.getPointer(),
9424                                        InputInfo.PointersArray.getPointer(),
9425                                        InputInfo.SizesArray.getPointer(),
9426                                        MapTypesArray};
9427       Return = CGF.EmitRuntimeCall(
9428           OMPBuilder.getOrCreateRuntimeFunction(
9429               CGM.getModule(),
9430               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9431           OffloadingArgs);
9432     }
9433 
9434     // Check the error code and execute the host version if required.
9435     llvm::BasicBlock *OffloadFailedBlock =
9436         CGF.createBasicBlock("omp_offload.failed");
9437     llvm::BasicBlock *OffloadContBlock =
9438         CGF.createBasicBlock("omp_offload.cont");
9439     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9440     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9441 
9442     CGF.EmitBlock(OffloadFailedBlock);
9443     if (RequiresOuterTask) {
9444       CapturedVars.clear();
9445       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9446     }
9447     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9448     CGF.EmitBranch(OffloadContBlock);
9449 
9450     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9451   };
9452 
9453   // Notify that the host version must be executed.
9454   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9455                     RequiresOuterTask](CodeGenFunction &CGF,
9456                                        PrePostActionTy &) {
9457     if (RequiresOuterTask) {
9458       CapturedVars.clear();
9459       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9460     }
9461     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9462   };
9463 
9464   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9465                           &CapturedVars, RequiresOuterTask,
9466                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9467     // Fill up the arrays with all the captured variables.
9468     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9469     MappableExprsHandler::MapValuesArrayTy Pointers;
9470     MappableExprsHandler::MapValuesArrayTy Sizes;
9471     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9472 
9473     // Get mappable expression information.
9474     MappableExprsHandler MEHandler(D, CGF);
9475     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9476 
9477     auto RI = CS.getCapturedRecordDecl()->field_begin();
9478     auto CV = CapturedVars.begin();
9479     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9480                                               CE = CS.capture_end();
9481          CI != CE; ++CI, ++RI, ++CV) {
9482       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9483       MappableExprsHandler::MapValuesArrayTy CurPointers;
9484       MappableExprsHandler::MapValuesArrayTy CurSizes;
9485       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9486       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9487 
9488       // VLA sizes are passed to the outlined region by copy and do not have map
9489       // information associated.
9490       if (CI->capturesVariableArrayType()) {
9491         CurBasePointers.push_back(*CV);
9492         CurPointers.push_back(*CV);
9493         CurSizes.push_back(CGF.Builder.CreateIntCast(
9494             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9495         // Copy to the device as an argument. No need to retrieve it.
9496         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9497                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9498                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9499       } else {
9500         // If we have any information in the map clause, we use it, otherwise we
9501         // just do a default mapping.
9502         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9503                                          CurSizes, CurMapTypes, PartialStruct);
9504         if (CurBasePointers.empty())
9505           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9506                                            CurPointers, CurSizes, CurMapTypes);
9507         // Generate correct mapping for variables captured by reference in
9508         // lambdas.
9509         if (CI->capturesVariable())
9510           MEHandler.generateInfoForLambdaCaptures(
9511               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9512               CurMapTypes, LambdaPointers);
9513       }
9514       // We expect to have at least an element of information for this capture.
9515       assert(!CurBasePointers.empty() &&
9516              "Non-existing map pointer for capture!");
9517       assert(CurBasePointers.size() == CurPointers.size() &&
9518              CurBasePointers.size() == CurSizes.size() &&
9519              CurBasePointers.size() == CurMapTypes.size() &&
9520              "Inconsistent map information sizes!");
9521 
9522       // If there is an entry in PartialStruct it means we have a struct with
9523       // individual members mapped. Emit an extra combined entry.
9524       if (PartialStruct.Base.isValid())
9525         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9526                                     CurMapTypes, PartialStruct);
9527 
9528       // We need to append the results of this capture to what we already have.
9529       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9530       Pointers.append(CurPointers.begin(), CurPointers.end());
9531       Sizes.append(CurSizes.begin(), CurSizes.end());
9532       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9533     }
9534     // Adjust MEMBER_OF flags for the lambdas captures.
9535     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9536                                               Pointers, MapTypes);
9537     // Map other list items in the map clause which are not captured variables
9538     // but "declare target link" global variables.
9539     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9540                                                MapTypes);
9541 
9542     TargetDataInfo Info;
9543     // Fill up the arrays and create the arguments.
9544     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9545     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9546                                  Info.PointersArray, Info.SizesArray,
9547                                  Info.MapTypesArray, Info);
9548     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9549     InputInfo.BasePointersArray =
9550         Address(Info.BasePointersArray, CGM.getPointerAlign());
9551     InputInfo.PointersArray =
9552         Address(Info.PointersArray, CGM.getPointerAlign());
9553     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9554     MapTypesArray = Info.MapTypesArray;
9555     if (RequiresOuterTask)
9556       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9557     else
9558       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9559   };
9560 
9561   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9562                              CodeGenFunction &CGF, PrePostActionTy &) {
9563     if (RequiresOuterTask) {
9564       CodeGenFunction::OMPTargetDataInfo InputInfo;
9565       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9566     } else {
9567       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9568     }
9569   };
9570 
9571   // If we have a target function ID it means that we need to support
9572   // offloading, otherwise, just execute on the host. We need to execute on host
9573   // regardless of the conditional in the if clause if, e.g., the user do not
9574   // specify target triples.
9575   if (OutlinedFnID) {
9576     if (IfCond) {
9577       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9578     } else {
9579       RegionCodeGenTy ThenRCG(TargetThenGen);
9580       ThenRCG(CGF);
9581     }
9582   } else {
9583     RegionCodeGenTy ElseRCG(TargetElseGen);
9584     ElseRCG(CGF);
9585   }
9586 }
9587 
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9588 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9589                                                     StringRef ParentName) {
9590   if (!S)
9591     return;
9592 
9593   // Codegen OMP target directives that offload compute to the device.
9594   bool RequiresDeviceCodegen =
9595       isa<OMPExecutableDirective>(S) &&
9596       isOpenMPTargetExecutionDirective(
9597           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9598 
9599   if (RequiresDeviceCodegen) {
9600     const auto &E = *cast<OMPExecutableDirective>(S);
9601     unsigned DeviceID;
9602     unsigned FileID;
9603     unsigned Line;
9604     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9605                              FileID, Line);
9606 
9607     // Is this a target region that should not be emitted as an entry point? If
9608     // so just signal we are done with this target region.
9609     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9610                                                             ParentName, Line))
9611       return;
9612 
9613     switch (E.getDirectiveKind()) {
9614     case OMPD_target:
9615       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9616                                                    cast<OMPTargetDirective>(E));
9617       break;
9618     case OMPD_target_parallel:
9619       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9620           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9621       break;
9622     case OMPD_target_teams:
9623       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9624           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9625       break;
9626     case OMPD_target_teams_distribute:
9627       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9628           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9629       break;
9630     case OMPD_target_teams_distribute_simd:
9631       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9632           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9633       break;
9634     case OMPD_target_parallel_for:
9635       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9636           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9637       break;
9638     case OMPD_target_parallel_for_simd:
9639       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9640           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9641       break;
9642     case OMPD_target_simd:
9643       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9644           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9645       break;
9646     case OMPD_target_teams_distribute_parallel_for:
9647       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9648           CGM, ParentName,
9649           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9650       break;
9651     case OMPD_target_teams_distribute_parallel_for_simd:
9652       CodeGenFunction::
9653           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9654               CGM, ParentName,
9655               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9656       break;
9657     case OMPD_parallel:
9658     case OMPD_for:
9659     case OMPD_parallel_for:
9660     case OMPD_parallel_master:
9661     case OMPD_parallel_sections:
9662     case OMPD_for_simd:
9663     case OMPD_parallel_for_simd:
9664     case OMPD_cancel:
9665     case OMPD_cancellation_point:
9666     case OMPD_ordered:
9667     case OMPD_threadprivate:
9668     case OMPD_allocate:
9669     case OMPD_task:
9670     case OMPD_simd:
9671     case OMPD_sections:
9672     case OMPD_section:
9673     case OMPD_single:
9674     case OMPD_master:
9675     case OMPD_critical:
9676     case OMPD_taskyield:
9677     case OMPD_barrier:
9678     case OMPD_taskwait:
9679     case OMPD_taskgroup:
9680     case OMPD_atomic:
9681     case OMPD_flush:
9682     case OMPD_depobj:
9683     case OMPD_scan:
9684     case OMPD_teams:
9685     case OMPD_target_data:
9686     case OMPD_target_exit_data:
9687     case OMPD_target_enter_data:
9688     case OMPD_distribute:
9689     case OMPD_distribute_simd:
9690     case OMPD_distribute_parallel_for:
9691     case OMPD_distribute_parallel_for_simd:
9692     case OMPD_teams_distribute:
9693     case OMPD_teams_distribute_simd:
9694     case OMPD_teams_distribute_parallel_for:
9695     case OMPD_teams_distribute_parallel_for_simd:
9696     case OMPD_target_update:
9697     case OMPD_declare_simd:
9698     case OMPD_declare_variant:
9699     case OMPD_begin_declare_variant:
9700     case OMPD_end_declare_variant:
9701     case OMPD_declare_target:
9702     case OMPD_end_declare_target:
9703     case OMPD_declare_reduction:
9704     case OMPD_declare_mapper:
9705     case OMPD_taskloop:
9706     case OMPD_taskloop_simd:
9707     case OMPD_master_taskloop:
9708     case OMPD_master_taskloop_simd:
9709     case OMPD_parallel_master_taskloop:
9710     case OMPD_parallel_master_taskloop_simd:
9711     case OMPD_requires:
9712     case OMPD_unknown:
9713     default:
9714       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9715     }
9716     return;
9717   }
9718 
9719   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9720     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9721       return;
9722 
9723     scanForTargetRegionsFunctions(
9724         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9725     return;
9726   }
9727 
9728   // If this is a lambda function, look into its body.
9729   if (const auto *L = dyn_cast<LambdaExpr>(S))
9730     S = L->getBody();
9731 
9732   // Keep looking for target regions recursively.
9733   for (const Stmt *II : S->children())
9734     scanForTargetRegionsFunctions(II, ParentName);
9735 }
9736 
emitTargetFunctions(GlobalDecl GD)9737 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9738   // If emitting code for the host, we do not process FD here. Instead we do
9739   // the normal code generation.
9740   if (!CGM.getLangOpts().OpenMPIsDevice) {
9741     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9742       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9743           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9744       // Do not emit device_type(nohost) functions for the host.
9745       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9746         return true;
9747     }
9748     return false;
9749   }
9750 
9751   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9752   // Try to detect target regions in the function.
9753   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9754     StringRef Name = CGM.getMangledName(GD);
9755     scanForTargetRegionsFunctions(FD->getBody(), Name);
9756     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9757         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9758     // Do not emit device_type(nohost) functions for the host.
9759     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9760       return true;
9761   }
9762 
9763   // Do not to emit function if it is not marked as declare target.
9764   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9765          AlreadyEmittedTargetDecls.count(VD) == 0;
9766 }
9767 
emitTargetGlobalVariable(GlobalDecl GD)9768 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9769   if (!CGM.getLangOpts().OpenMPIsDevice)
9770     return false;
9771 
9772   // Check if there are Ctors/Dtors in this declaration and look for target
9773   // regions in it. We use the complete variant to produce the kernel name
9774   // mangling.
9775   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9776   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9777     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9778       StringRef ParentName =
9779           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9780       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9781     }
9782     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9783       StringRef ParentName =
9784           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9785       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9786     }
9787   }
9788 
9789   // Do not to emit variable if it is not marked as declare target.
9790   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9791       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9792           cast<VarDecl>(GD.getDecl()));
9793   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9794       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9795        HasRequiresUnifiedSharedMemory)) {
9796     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9797     return true;
9798   }
9799   return false;
9800 }
9801 
9802 llvm::Constant *
registerTargetFirstprivateCopy(CodeGenFunction & CGF,const VarDecl * VD)9803 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9804                                                 const VarDecl *VD) {
9805   assert(VD->getType().isConstant(CGM.getContext()) &&
9806          "Expected constant variable.");
9807   StringRef VarName;
9808   llvm::Constant *Addr;
9809   llvm::GlobalValue::LinkageTypes Linkage;
9810   QualType Ty = VD->getType();
9811   SmallString<128> Buffer;
9812   {
9813     unsigned DeviceID;
9814     unsigned FileID;
9815     unsigned Line;
9816     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9817                              FileID, Line);
9818     llvm::raw_svector_ostream OS(Buffer);
9819     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9820        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9821     VarName = OS.str();
9822   }
9823   Linkage = llvm::GlobalValue::InternalLinkage;
9824   Addr =
9825       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9826                                   getDefaultFirstprivateAddressSpace());
9827   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9828   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9829   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9830   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9831       VarName, Addr, VarSize,
9832       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9833   return Addr;
9834 }
9835 
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)9836 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9837                                                    llvm::Constant *Addr) {
9838   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9839       !CGM.getLangOpts().OpenMPIsDevice)
9840     return;
9841   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9842       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9843   if (!Res) {
9844     if (CGM.getLangOpts().OpenMPIsDevice) {
9845       // Register non-target variables being emitted in device code (debug info
9846       // may cause this).
9847       StringRef VarName = CGM.getMangledName(VD);
9848       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9849     }
9850     return;
9851   }
9852   // Register declare target variables.
9853   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9854   StringRef VarName;
9855   CharUnits VarSize;
9856   llvm::GlobalValue::LinkageTypes Linkage;
9857 
9858   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9859       !HasRequiresUnifiedSharedMemory) {
9860     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9861     VarName = CGM.getMangledName(VD);
9862     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9863       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9864       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9865     } else {
9866       VarSize = CharUnits::Zero();
9867     }
9868     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9869     // Temp solution to prevent optimizations of the internal variables.
9870     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9871       std::string RefName = getName({VarName, "ref"});
9872       if (!CGM.GetGlobalValue(RefName)) {
9873         llvm::Constant *AddrRef =
9874             getOrCreateInternalVariable(Addr->getType(), RefName);
9875         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9876         GVAddrRef->setConstant(/*Val=*/true);
9877         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9878         GVAddrRef->setInitializer(Addr);
9879         CGM.addCompilerUsedGlobal(GVAddrRef);
9880       }
9881     }
9882   } else {
9883     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9884             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9885              HasRequiresUnifiedSharedMemory)) &&
9886            "Declare target attribute must link or to with unified memory.");
9887     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9888       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9889     else
9890       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9891 
9892     if (CGM.getLangOpts().OpenMPIsDevice) {
9893       VarName = Addr->getName();
9894       Addr = nullptr;
9895     } else {
9896       VarName = getAddrOfDeclareTargetVar(VD).getName();
9897       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9898     }
9899     VarSize = CGM.getPointerSize();
9900     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9901   }
9902 
9903   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9904       VarName, Addr, VarSize, Flags, Linkage);
9905 }
9906 
emitTargetGlobal(GlobalDecl GD)9907 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9908   if (isa<FunctionDecl>(GD.getDecl()) ||
9909       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9910     return emitTargetFunctions(GD);
9911 
9912   return emitTargetGlobalVariable(GD);
9913 }
9914 
emitDeferredTargetDecls() const9915 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9916   for (const VarDecl *VD : DeferredGlobalVariables) {
9917     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9918         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9919     if (!Res)
9920       continue;
9921     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9922         !HasRequiresUnifiedSharedMemory) {
9923       CGM.EmitGlobal(VD);
9924     } else {
9925       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9926               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9927                HasRequiresUnifiedSharedMemory)) &&
9928              "Expected link clause or to clause with unified memory.");
9929       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9930     }
9931   }
9932 }
9933 
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const9934 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9935     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9936   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9937          " Expected target-based directive.");
9938 }
9939 
processRequiresDirective(const OMPRequiresDecl * D)9940 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9941   for (const OMPClause *Clause : D->clauselists()) {
9942     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9943       HasRequiresUnifiedSharedMemory = true;
9944     } else if (const auto *AC =
9945                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9946       switch (AC->getAtomicDefaultMemOrderKind()) {
9947       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9948         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9949         break;
9950       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9951         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9952         break;
9953       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9954         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9955         break;
9956       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9957         break;
9958       }
9959     }
9960   }
9961 }
9962 
getDefaultMemoryOrdering() const9963 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9964   return RequiresAtomicOrdering;
9965 }
9966 
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)9967 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9968                                                        LangAS &AS) {
9969   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9970     return false;
9971   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9972   switch(A->getAllocatorType()) {
9973   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9974   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9975   // Not supported, fallback to the default mem space.
9976   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9977   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9978   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9979   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9980   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9981   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9982   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9983     AS = LangAS::Default;
9984     return true;
9985   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9986     llvm_unreachable("Expected predefined allocator for the variables with the "
9987                      "static storage.");
9988   }
9989   return false;
9990 }
9991 
hasRequiresUnifiedSharedMemory() const9992 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9993   return HasRequiresUnifiedSharedMemory;
9994 }
9995 
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)9996 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9997     CodeGenModule &CGM)
9998     : CGM(CGM) {
9999   if (CGM.getLangOpts().OpenMPIsDevice) {
10000     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10001     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10002   }
10003 }
10004 
~DisableAutoDeclareTargetRAII()10005 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10006   if (CGM.getLangOpts().OpenMPIsDevice)
10007     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10008 }
10009 
markAsGlobalTarget(GlobalDecl GD)10010 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10011   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10012     return true;
10013 
10014   const auto *D = cast<FunctionDecl>(GD.getDecl());
10015   // Do not to emit function if it is marked as declare target as it was already
10016   // emitted.
10017   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10018     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10019       if (auto *F = dyn_cast_or_null<llvm::Function>(
10020               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10021         return !F->isDeclaration();
10022       return false;
10023     }
10024     return true;
10025   }
10026 
10027   return !AlreadyEmittedTargetDecls.insert(D).second;
10028 }
10029 
emitRequiresDirectiveRegFun()10030 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10031   // If we don't have entries or if we are emitting code for the device, we
10032   // don't need to do anything.
10033   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10034       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10035       (OffloadEntriesInfoManager.empty() &&
10036        !HasEmittedDeclareTargetRegion &&
10037        !HasEmittedTargetRegion))
10038     return nullptr;
10039 
10040   // Create and register the function that handles the requires directives.
10041   ASTContext &C = CGM.getContext();
10042 
10043   llvm::Function *RequiresRegFn;
10044   {
10045     CodeGenFunction CGF(CGM);
10046     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10047     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10048     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10049     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10050     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10051     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10052     // TODO: check for other requires clauses.
10053     // The requires directive takes effect only when a target region is
10054     // present in the compilation unit. Otherwise it is ignored and not
10055     // passed to the runtime. This avoids the runtime from throwing an error
10056     // for mismatching requires clauses across compilation units that don't
10057     // contain at least 1 target region.
10058     assert((HasEmittedTargetRegion ||
10059             HasEmittedDeclareTargetRegion ||
10060             !OffloadEntriesInfoManager.empty()) &&
10061            "Target or declare target region expected.");
10062     if (HasRequiresUnifiedSharedMemory)
10063       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10064     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10065                             CGM.getModule(), OMPRTL___tgt_register_requires),
10066                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10067     CGF.FinishFunction();
10068   }
10069   return RequiresRegFn;
10070 }
10071 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10072 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10073                                     const OMPExecutableDirective &D,
10074                                     SourceLocation Loc,
10075                                     llvm::Function *OutlinedFn,
10076                                     ArrayRef<llvm::Value *> CapturedVars) {
10077   if (!CGF.HaveInsertPoint())
10078     return;
10079 
10080   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10081   CodeGenFunction::RunCleanupsScope Scope(CGF);
10082 
10083   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10084   llvm::Value *Args[] = {
10085       RTLoc,
10086       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10087       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10088   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10089   RealArgs.append(std::begin(Args), std::end(Args));
10090   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10091 
10092   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10093       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10094   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10095 }
10096 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10097 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10098                                          const Expr *NumTeams,
10099                                          const Expr *ThreadLimit,
10100                                          SourceLocation Loc) {
10101   if (!CGF.HaveInsertPoint())
10102     return;
10103 
10104   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10105 
10106   llvm::Value *NumTeamsVal =
10107       NumTeams
10108           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10109                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10110           : CGF.Builder.getInt32(0);
10111 
10112   llvm::Value *ThreadLimitVal =
10113       ThreadLimit
10114           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10115                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10116           : CGF.Builder.getInt32(0);
10117 
10118   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10119   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10120                                      ThreadLimitVal};
10121   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10122                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10123                       PushNumTeamsArgs);
10124 }
10125 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)10126 void CGOpenMPRuntime::emitTargetDataCalls(
10127     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10128     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10129   if (!CGF.HaveInsertPoint())
10130     return;
10131 
10132   // Action used to replace the default codegen action and turn privatization
10133   // off.
10134   PrePostActionTy NoPrivAction;
10135 
10136   // Generate the code for the opening of the data environment. Capture all the
10137   // arguments of the runtime call by reference because they are used in the
10138   // closing of the region.
10139   auto &&BeginThenGen = [this, &D, Device, &Info,
10140                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10141     // Fill up the arrays with all the mapped variables.
10142     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10143     MappableExprsHandler::MapValuesArrayTy Pointers;
10144     MappableExprsHandler::MapValuesArrayTy Sizes;
10145     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10146 
10147     // Get map clause information.
10148     MappableExprsHandler MCHandler(D, CGF);
10149     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10150 
10151     // Fill up the arrays and create the arguments.
10152     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10153 
10154     llvm::Value *BasePointersArrayArg = nullptr;
10155     llvm::Value *PointersArrayArg = nullptr;
10156     llvm::Value *SizesArrayArg = nullptr;
10157     llvm::Value *MapTypesArrayArg = nullptr;
10158     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10159                                  SizesArrayArg, MapTypesArrayArg, Info);
10160 
10161     // Emit device ID if any.
10162     llvm::Value *DeviceID = nullptr;
10163     if (Device) {
10164       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10165                                            CGF.Int64Ty, /*isSigned=*/true);
10166     } else {
10167       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10168     }
10169 
10170     // Emit the number of elements in the offloading arrays.
10171     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10172 
10173     llvm::Value *OffloadingArgs[] = {
10174         DeviceID,         PointerNum,    BasePointersArrayArg,
10175         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10176     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10177                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10178                         OffloadingArgs);
10179 
10180     // If device pointer privatization is required, emit the body of the region
10181     // here. It will have to be duplicated: with and without privatization.
10182     if (!Info.CaptureDeviceAddrMap.empty())
10183       CodeGen(CGF);
10184   };
10185 
10186   // Generate code for the closing of the data region.
10187   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10188                                             PrePostActionTy &) {
10189     assert(Info.isValid() && "Invalid data environment closing arguments.");
10190 
10191     llvm::Value *BasePointersArrayArg = nullptr;
10192     llvm::Value *PointersArrayArg = nullptr;
10193     llvm::Value *SizesArrayArg = nullptr;
10194     llvm::Value *MapTypesArrayArg = nullptr;
10195     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10196                                  SizesArrayArg, MapTypesArrayArg, Info);
10197 
10198     // Emit device ID if any.
10199     llvm::Value *DeviceID = nullptr;
10200     if (Device) {
10201       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10202                                            CGF.Int64Ty, /*isSigned=*/true);
10203     } else {
10204       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10205     }
10206 
10207     // Emit the number of elements in the offloading arrays.
10208     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10209 
10210     llvm::Value *OffloadingArgs[] = {
10211         DeviceID,         PointerNum,    BasePointersArrayArg,
10212         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10213     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10214                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10215                         OffloadingArgs);
10216   };
10217 
10218   // If we need device pointer privatization, we need to emit the body of the
10219   // region with no privatization in the 'else' branch of the conditional.
10220   // Otherwise, we don't have to do anything.
10221   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10222                                                          PrePostActionTy &) {
10223     if (!Info.CaptureDeviceAddrMap.empty()) {
10224       CodeGen.setAction(NoPrivAction);
10225       CodeGen(CGF);
10226     }
10227   };
10228 
10229   // We don't have to do anything to close the region if the if clause evaluates
10230   // to false.
10231   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10232 
10233   if (IfCond) {
10234     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10235   } else {
10236     RegionCodeGenTy RCG(BeginThenGen);
10237     RCG(CGF);
10238   }
10239 
10240   // If we don't require privatization of device pointers, we emit the body in
10241   // between the runtime calls. This avoids duplicating the body code.
10242   if (Info.CaptureDeviceAddrMap.empty()) {
10243     CodeGen.setAction(NoPrivAction);
10244     CodeGen(CGF);
10245   }
10246 
10247   if (IfCond) {
10248     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10249   } else {
10250     RegionCodeGenTy RCG(EndThenGen);
10251     RCG(CGF);
10252   }
10253 }
10254 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10255 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10256     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10257     const Expr *Device) {
10258   if (!CGF.HaveInsertPoint())
10259     return;
10260 
10261   assert((isa<OMPTargetEnterDataDirective>(D) ||
10262           isa<OMPTargetExitDataDirective>(D) ||
10263           isa<OMPTargetUpdateDirective>(D)) &&
10264          "Expecting either target enter, exit data, or update directives.");
10265 
10266   CodeGenFunction::OMPTargetDataInfo InputInfo;
10267   llvm::Value *MapTypesArray = nullptr;
10268   // Generate the code for the opening of the data environment.
10269   auto &&ThenGen = [this, &D, Device, &InputInfo,
10270                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10271     // Emit device ID if any.
10272     llvm::Value *DeviceID = nullptr;
10273     if (Device) {
10274       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10275                                            CGF.Int64Ty, /*isSigned=*/true);
10276     } else {
10277       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10278     }
10279 
10280     // Emit the number of elements in the offloading arrays.
10281     llvm::Constant *PointerNum =
10282         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10283 
10284     llvm::Value *OffloadingArgs[] = {DeviceID,
10285                                      PointerNum,
10286                                      InputInfo.BasePointersArray.getPointer(),
10287                                      InputInfo.PointersArray.getPointer(),
10288                                      InputInfo.SizesArray.getPointer(),
10289                                      MapTypesArray};
10290 
10291     // Select the right runtime function call for each expected standalone
10292     // directive.
10293     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10294     RuntimeFunction RTLFn;
10295     switch (D.getDirectiveKind()) {
10296     case OMPD_target_enter_data:
10297       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10298                         : OMPRTL___tgt_target_data_begin;
10299       break;
10300     case OMPD_target_exit_data:
10301       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10302                         : OMPRTL___tgt_target_data_end;
10303       break;
10304     case OMPD_target_update:
10305       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10306                         : OMPRTL___tgt_target_data_update;
10307       break;
10308     case OMPD_parallel:
10309     case OMPD_for:
10310     case OMPD_parallel_for:
10311     case OMPD_parallel_master:
10312     case OMPD_parallel_sections:
10313     case OMPD_for_simd:
10314     case OMPD_parallel_for_simd:
10315     case OMPD_cancel:
10316     case OMPD_cancellation_point:
10317     case OMPD_ordered:
10318     case OMPD_threadprivate:
10319     case OMPD_allocate:
10320     case OMPD_task:
10321     case OMPD_simd:
10322     case OMPD_sections:
10323     case OMPD_section:
10324     case OMPD_single:
10325     case OMPD_master:
10326     case OMPD_critical:
10327     case OMPD_taskyield:
10328     case OMPD_barrier:
10329     case OMPD_taskwait:
10330     case OMPD_taskgroup:
10331     case OMPD_atomic:
10332     case OMPD_flush:
10333     case OMPD_depobj:
10334     case OMPD_scan:
10335     case OMPD_teams:
10336     case OMPD_target_data:
10337     case OMPD_distribute:
10338     case OMPD_distribute_simd:
10339     case OMPD_distribute_parallel_for:
10340     case OMPD_distribute_parallel_for_simd:
10341     case OMPD_teams_distribute:
10342     case OMPD_teams_distribute_simd:
10343     case OMPD_teams_distribute_parallel_for:
10344     case OMPD_teams_distribute_parallel_for_simd:
10345     case OMPD_declare_simd:
10346     case OMPD_declare_variant:
10347     case OMPD_begin_declare_variant:
10348     case OMPD_end_declare_variant:
10349     case OMPD_declare_target:
10350     case OMPD_end_declare_target:
10351     case OMPD_declare_reduction:
10352     case OMPD_declare_mapper:
10353     case OMPD_taskloop:
10354     case OMPD_taskloop_simd:
10355     case OMPD_master_taskloop:
10356     case OMPD_master_taskloop_simd:
10357     case OMPD_parallel_master_taskloop:
10358     case OMPD_parallel_master_taskloop_simd:
10359     case OMPD_target:
10360     case OMPD_target_simd:
10361     case OMPD_target_teams_distribute:
10362     case OMPD_target_teams_distribute_simd:
10363     case OMPD_target_teams_distribute_parallel_for:
10364     case OMPD_target_teams_distribute_parallel_for_simd:
10365     case OMPD_target_teams:
10366     case OMPD_target_parallel:
10367     case OMPD_target_parallel_for:
10368     case OMPD_target_parallel_for_simd:
10369     case OMPD_requires:
10370     case OMPD_unknown:
10371     default:
10372       llvm_unreachable("Unexpected standalone target data directive.");
10373       break;
10374     }
10375     CGF.EmitRuntimeCall(
10376         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10377         OffloadingArgs);
10378   };
10379 
10380   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10381                              CodeGenFunction &CGF, PrePostActionTy &) {
10382     // Fill up the arrays with all the mapped variables.
10383     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10384     MappableExprsHandler::MapValuesArrayTy Pointers;
10385     MappableExprsHandler::MapValuesArrayTy Sizes;
10386     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10387 
10388     // Get map clause information.
10389     MappableExprsHandler MEHandler(D, CGF);
10390     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10391 
10392     TargetDataInfo Info;
10393     // Fill up the arrays and create the arguments.
10394     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10395     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10396                                  Info.PointersArray, Info.SizesArray,
10397                                  Info.MapTypesArray, Info);
10398     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10399     InputInfo.BasePointersArray =
10400         Address(Info.BasePointersArray, CGM.getPointerAlign());
10401     InputInfo.PointersArray =
10402         Address(Info.PointersArray, CGM.getPointerAlign());
10403     InputInfo.SizesArray =
10404         Address(Info.SizesArray, CGM.getPointerAlign());
10405     MapTypesArray = Info.MapTypesArray;
10406     if (D.hasClausesOfKind<OMPDependClause>())
10407       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10408     else
10409       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10410   };
10411 
10412   if (IfCond) {
10413     emitIfClause(CGF, IfCond, TargetThenGen,
10414                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10415   } else {
10416     RegionCodeGenTy ThenRCG(TargetThenGen);
10417     ThenRCG(CGF);
10418   }
10419 }
10420 
10421 namespace {
10422   /// Kind of parameter in a function with 'declare simd' directive.
10423   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10424   /// Attribute set of the parameter.
10425   struct ParamAttrTy {
10426     ParamKindTy Kind = Vector;
10427     llvm::APSInt StrideOrArg;
10428     llvm::APSInt Alignment;
10429   };
10430 } // namespace
10431 
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10432 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10433                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10434   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10435   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10436   // of that clause. The VLEN value must be power of 2.
10437   // In other case the notion of the function`s "characteristic data type" (CDT)
10438   // is used to compute the vector length.
10439   // CDT is defined in the following order:
10440   //   a) For non-void function, the CDT is the return type.
10441   //   b) If the function has any non-uniform, non-linear parameters, then the
10442   //   CDT is the type of the first such parameter.
10443   //   c) If the CDT determined by a) or b) above is struct, union, or class
10444   //   type which is pass-by-value (except for the type that maps to the
10445   //   built-in complex data type), the characteristic data type is int.
10446   //   d) If none of the above three cases is applicable, the CDT is int.
10447   // The VLEN is then determined based on the CDT and the size of vector
10448   // register of that ISA for which current vector version is generated. The
10449   // VLEN is computed using the formula below:
10450   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10451   // where vector register size specified in section 3.2.1 Registers and the
10452   // Stack Frame of original AMD64 ABI document.
10453   QualType RetType = FD->getReturnType();
10454   if (RetType.isNull())
10455     return 0;
10456   ASTContext &C = FD->getASTContext();
10457   QualType CDT;
10458   if (!RetType.isNull() && !RetType->isVoidType()) {
10459     CDT = RetType;
10460   } else {
10461     unsigned Offset = 0;
10462     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10463       if (ParamAttrs[Offset].Kind == Vector)
10464         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10465       ++Offset;
10466     }
10467     if (CDT.isNull()) {
10468       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10469         if (ParamAttrs[I + Offset].Kind == Vector) {
10470           CDT = FD->getParamDecl(I)->getType();
10471           break;
10472         }
10473       }
10474     }
10475   }
10476   if (CDT.isNull())
10477     CDT = C.IntTy;
10478   CDT = CDT->getCanonicalTypeUnqualified();
10479   if (CDT->isRecordType() || CDT->isUnionType())
10480     CDT = C.IntTy;
10481   return C.getTypeSize(CDT);
10482 }
10483 
10484 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10485 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10486                            const llvm::APSInt &VLENVal,
10487                            ArrayRef<ParamAttrTy> ParamAttrs,
10488                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10489   struct ISADataTy {
10490     char ISA;
10491     unsigned VecRegSize;
10492   };
10493   ISADataTy ISAData[] = {
10494       {
10495           'b', 128
10496       }, // SSE
10497       {
10498           'c', 256
10499       }, // AVX
10500       {
10501           'd', 256
10502       }, // AVX2
10503       {
10504           'e', 512
10505       }, // AVX512
10506   };
10507   llvm::SmallVector<char, 2> Masked;
10508   switch (State) {
10509   case OMPDeclareSimdDeclAttr::BS_Undefined:
10510     Masked.push_back('N');
10511     Masked.push_back('M');
10512     break;
10513   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10514     Masked.push_back('N');
10515     break;
10516   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10517     Masked.push_back('M');
10518     break;
10519   }
10520   for (char Mask : Masked) {
10521     for (const ISADataTy &Data : ISAData) {
10522       SmallString<256> Buffer;
10523       llvm::raw_svector_ostream Out(Buffer);
10524       Out << "_ZGV" << Data.ISA << Mask;
10525       if (!VLENVal) {
10526         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10527         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10528         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10529       } else {
10530         Out << VLENVal;
10531       }
10532       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10533         switch (ParamAttr.Kind){
10534         case LinearWithVarStride:
10535           Out << 's' << ParamAttr.StrideOrArg;
10536           break;
10537         case Linear:
10538           Out << 'l';
10539           if (ParamAttr.StrideOrArg != 1)
10540             Out << ParamAttr.StrideOrArg;
10541           break;
10542         case Uniform:
10543           Out << 'u';
10544           break;
10545         case Vector:
10546           Out << 'v';
10547           break;
10548         }
10549         if (!!ParamAttr.Alignment)
10550           Out << 'a' << ParamAttr.Alignment;
10551       }
10552       Out << '_' << Fn->getName();
10553       Fn->addFnAttr(Out.str());
10554     }
10555   }
10556 }
10557 
10558 // This are the Functions that are needed to mangle the name of the
10559 // vector functions generated by the compiler, according to the rules
10560 // defined in the "Vector Function ABI specifications for AArch64",
10561 // available at
10562 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10563 
10564 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10565 ///
10566 /// TODO: Need to implement the behavior for reference marked with a
10567 /// var or no linear modifiers (1.b in the section). For this, we
10568 /// need to extend ParamKindTy to support the linear modifiers.
getAArch64MTV(QualType QT,ParamKindTy Kind)10569 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10570   QT = QT.getCanonicalType();
10571 
10572   if (QT->isVoidType())
10573     return false;
10574 
10575   if (Kind == ParamKindTy::Uniform)
10576     return false;
10577 
10578   if (Kind == ParamKindTy::Linear)
10579     return false;
10580 
10581   // TODO: Handle linear references with modifiers
10582 
10583   if (Kind == ParamKindTy::LinearWithVarStride)
10584     return false;
10585 
10586   return true;
10587 }
10588 
10589 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10590 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10591   QT = QT.getCanonicalType();
10592   unsigned Size = C.getTypeSize(QT);
10593 
10594   // Only scalars and complex within 16 bytes wide set PVB to true.
10595   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10596     return false;
10597 
10598   if (QT->isFloatingType())
10599     return true;
10600 
10601   if (QT->isIntegerType())
10602     return true;
10603 
10604   if (QT->isPointerType())
10605     return true;
10606 
10607   // TODO: Add support for complex types (section 3.1.2, item 2).
10608 
10609   return false;
10610 }
10611 
10612 /// Computes the lane size (LS) of a return type or of an input parameter,
10613 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10614 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10615 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10616   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10617     QualType PTy = QT.getCanonicalType()->getPointeeType();
10618     if (getAArch64PBV(PTy, C))
10619       return C.getTypeSize(PTy);
10620   }
10621   if (getAArch64PBV(QT, C))
10622     return C.getTypeSize(QT);
10623 
10624   return C.getTypeSize(C.getUIntPtrType());
10625 }
10626 
10627 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10628 // signature of the scalar function, as defined in 3.2.2 of the
10629 // AAVFABI.
10630 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10631 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10632   QualType RetType = FD->getReturnType().getCanonicalType();
10633 
10634   ASTContext &C = FD->getASTContext();
10635 
10636   bool OutputBecomesInput = false;
10637 
10638   llvm::SmallVector<unsigned, 8> Sizes;
10639   if (!RetType->isVoidType()) {
10640     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10641     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10642       OutputBecomesInput = true;
10643   }
10644   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10645     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10646     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10647   }
10648 
10649   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10650   // The LS of a function parameter / return value can only be a power
10651   // of 2, starting from 8 bits, up to 128.
10652   assert(std::all_of(Sizes.begin(), Sizes.end(),
10653                      [](unsigned Size) {
10654                        return Size == 8 || Size == 16 || Size == 32 ||
10655                               Size == 64 || Size == 128;
10656                      }) &&
10657          "Invalid size");
10658 
10659   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10660                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10661                          OutputBecomesInput);
10662 }
10663 
10664 /// Mangle the parameter part of the vector function name according to
10665 /// their OpenMP classification. The mangling function is defined in
10666 /// section 3.5 of the AAVFABI.
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10667 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10668   SmallString<256> Buffer;
10669   llvm::raw_svector_ostream Out(Buffer);
10670   for (const auto &ParamAttr : ParamAttrs) {
10671     switch (ParamAttr.Kind) {
10672     case LinearWithVarStride:
10673       Out << "ls" << ParamAttr.StrideOrArg;
10674       break;
10675     case Linear:
10676       Out << 'l';
10677       // Don't print the step value if it is not present or if it is
10678       // equal to 1.
10679       if (ParamAttr.StrideOrArg != 1)
10680         Out << ParamAttr.StrideOrArg;
10681       break;
10682     case Uniform:
10683       Out << 'u';
10684       break;
10685     case Vector:
10686       Out << 'v';
10687       break;
10688     }
10689 
10690     if (!!ParamAttr.Alignment)
10691       Out << 'a' << ParamAttr.Alignment;
10692   }
10693 
10694   return std::string(Out.str());
10695 }
10696 
10697 // Function used to add the attribute. The parameter `VLEN` is
10698 // templated to allow the use of "x" when targeting scalable functions
10699 // for SVE.
10700 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10701 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10702                                  char ISA, StringRef ParSeq,
10703                                  StringRef MangledName, bool OutputBecomesInput,
10704                                  llvm::Function *Fn) {
10705   SmallString<256> Buffer;
10706   llvm::raw_svector_ostream Out(Buffer);
10707   Out << Prefix << ISA << LMask << VLEN;
10708   if (OutputBecomesInput)
10709     Out << "v";
10710   Out << ParSeq << "_" << MangledName;
10711   Fn->addFnAttr(Out.str());
10712 }
10713 
10714 // Helper function to generate the Advanced SIMD names depending on
10715 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10716 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10717                                       StringRef Prefix, char ISA,
10718                                       StringRef ParSeq, StringRef MangledName,
10719                                       bool OutputBecomesInput,
10720                                       llvm::Function *Fn) {
10721   switch (NDS) {
10722   case 8:
10723     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10724                          OutputBecomesInput, Fn);
10725     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10726                          OutputBecomesInput, Fn);
10727     break;
10728   case 16:
10729     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10730                          OutputBecomesInput, Fn);
10731     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10732                          OutputBecomesInput, Fn);
10733     break;
10734   case 32:
10735     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10736                          OutputBecomesInput, Fn);
10737     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10738                          OutputBecomesInput, Fn);
10739     break;
10740   case 64:
10741   case 128:
10742     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10743                          OutputBecomesInput, Fn);
10744     break;
10745   default:
10746     llvm_unreachable("Scalar type is too wide.");
10747   }
10748 }
10749 
10750 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10751 static void emitAArch64DeclareSimdFunction(
10752     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10753     ArrayRef<ParamAttrTy> ParamAttrs,
10754     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10755     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10756 
10757   // Get basic data for building the vector signature.
10758   const auto Data = getNDSWDS(FD, ParamAttrs);
10759   const unsigned NDS = std::get<0>(Data);
10760   const unsigned WDS = std::get<1>(Data);
10761   const bool OutputBecomesInput = std::get<2>(Data);
10762 
10763   // Check the values provided via `simdlen` by the user.
10764   // 1. A `simdlen(1)` doesn't produce vector signatures,
10765   if (UserVLEN == 1) {
10766     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10767         DiagnosticsEngine::Warning,
10768         "The clause simdlen(1) has no effect when targeting aarch64.");
10769     CGM.getDiags().Report(SLoc, DiagID);
10770     return;
10771   }
10772 
10773   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10774   // Advanced SIMD output.
10775   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10776     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10777         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10778                                     "power of 2 when targeting Advanced SIMD.");
10779     CGM.getDiags().Report(SLoc, DiagID);
10780     return;
10781   }
10782 
10783   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10784   // limits.
10785   if (ISA == 's' && UserVLEN != 0) {
10786     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10787       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10788           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10789                                       "lanes in the architectural constraints "
10790                                       "for SVE (min is 128-bit, max is "
10791                                       "2048-bit, by steps of 128-bit)");
10792       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10793       return;
10794     }
10795   }
10796 
10797   // Sort out parameter sequence.
10798   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10799   StringRef Prefix = "_ZGV";
10800   // Generate simdlen from user input (if any).
10801   if (UserVLEN) {
10802     if (ISA == 's') {
10803       // SVE generates only a masked function.
10804       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10805                            OutputBecomesInput, Fn);
10806     } else {
10807       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10808       // Advanced SIMD generates one or two functions, depending on
10809       // the `[not]inbranch` clause.
10810       switch (State) {
10811       case OMPDeclareSimdDeclAttr::BS_Undefined:
10812         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10813                              OutputBecomesInput, Fn);
10814         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10815                              OutputBecomesInput, Fn);
10816         break;
10817       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10818         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10819                              OutputBecomesInput, Fn);
10820         break;
10821       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10822         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10823                              OutputBecomesInput, Fn);
10824         break;
10825       }
10826     }
10827   } else {
10828     // If no user simdlen is provided, follow the AAVFABI rules for
10829     // generating the vector length.
10830     if (ISA == 's') {
10831       // SVE, section 3.4.1, item 1.
10832       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10833                            OutputBecomesInput, Fn);
10834     } else {
10835       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10836       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10837       // two vector names depending on the use of the clause
10838       // `[not]inbranch`.
10839       switch (State) {
10840       case OMPDeclareSimdDeclAttr::BS_Undefined:
10841         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10842                                   OutputBecomesInput, Fn);
10843         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10844                                   OutputBecomesInput, Fn);
10845         break;
10846       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10847         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10848                                   OutputBecomesInput, Fn);
10849         break;
10850       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10851         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10852                                   OutputBecomesInput, Fn);
10853         break;
10854       }
10855     }
10856   }
10857 }
10858 
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)10859 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10860                                               llvm::Function *Fn) {
10861   ASTContext &C = CGM.getContext();
10862   FD = FD->getMostRecentDecl();
10863   // Map params to their positions in function decl.
10864   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10865   if (isa<CXXMethodDecl>(FD))
10866     ParamPositions.try_emplace(FD, 0);
10867   unsigned ParamPos = ParamPositions.size();
10868   for (const ParmVarDecl *P : FD->parameters()) {
10869     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10870     ++ParamPos;
10871   }
10872   while (FD) {
10873     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10874       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10875       // Mark uniform parameters.
10876       for (const Expr *E : Attr->uniforms()) {
10877         E = E->IgnoreParenImpCasts();
10878         unsigned Pos;
10879         if (isa<CXXThisExpr>(E)) {
10880           Pos = ParamPositions[FD];
10881         } else {
10882           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10883                                 ->getCanonicalDecl();
10884           Pos = ParamPositions[PVD];
10885         }
10886         ParamAttrs[Pos].Kind = Uniform;
10887       }
10888       // Get alignment info.
10889       auto NI = Attr->alignments_begin();
10890       for (const Expr *E : Attr->aligneds()) {
10891         E = E->IgnoreParenImpCasts();
10892         unsigned Pos;
10893         QualType ParmTy;
10894         if (isa<CXXThisExpr>(E)) {
10895           Pos = ParamPositions[FD];
10896           ParmTy = E->getType();
10897         } else {
10898           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10899                                 ->getCanonicalDecl();
10900           Pos = ParamPositions[PVD];
10901           ParmTy = PVD->getType();
10902         }
10903         ParamAttrs[Pos].Alignment =
10904             (*NI)
10905                 ? (*NI)->EvaluateKnownConstInt(C)
10906                 : llvm::APSInt::getUnsigned(
10907                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10908                           .getQuantity());
10909         ++NI;
10910       }
10911       // Mark linear parameters.
10912       auto SI = Attr->steps_begin();
10913       auto MI = Attr->modifiers_begin();
10914       for (const Expr *E : Attr->linears()) {
10915         E = E->IgnoreParenImpCasts();
10916         unsigned Pos;
10917         // Rescaling factor needed to compute the linear parameter
10918         // value in the mangled name.
10919         unsigned PtrRescalingFactor = 1;
10920         if (isa<CXXThisExpr>(E)) {
10921           Pos = ParamPositions[FD];
10922         } else {
10923           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10924                                 ->getCanonicalDecl();
10925           Pos = ParamPositions[PVD];
10926           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10927             PtrRescalingFactor = CGM.getContext()
10928                                      .getTypeSizeInChars(P->getPointeeType())
10929                                      .getQuantity();
10930         }
10931         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10932         ParamAttr.Kind = Linear;
10933         // Assuming a stride of 1, for `linear` without modifiers.
10934         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10935         if (*SI) {
10936           Expr::EvalResult Result;
10937           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10938             if (const auto *DRE =
10939                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10940               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10941                 ParamAttr.Kind = LinearWithVarStride;
10942                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10943                     ParamPositions[StridePVD->getCanonicalDecl()]);
10944               }
10945             }
10946           } else {
10947             ParamAttr.StrideOrArg = Result.Val.getInt();
10948           }
10949         }
10950         // If we are using a linear clause on a pointer, we need to
10951         // rescale the value of linear_step with the byte size of the
10952         // pointee type.
10953         if (Linear == ParamAttr.Kind)
10954           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10955         ++SI;
10956         ++MI;
10957       }
10958       llvm::APSInt VLENVal;
10959       SourceLocation ExprLoc;
10960       const Expr *VLENExpr = Attr->getSimdlen();
10961       if (VLENExpr) {
10962         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10963         ExprLoc = VLENExpr->getExprLoc();
10964       }
10965       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10966       if (CGM.getTriple().isX86()) {
10967         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10968       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10969         unsigned VLEN = VLENVal.getExtValue();
10970         StringRef MangledName = Fn->getName();
10971         if (CGM.getTarget().hasFeature("sve"))
10972           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10973                                          MangledName, 's', 128, Fn, ExprLoc);
10974         if (CGM.getTarget().hasFeature("neon"))
10975           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10976                                          MangledName, 'n', 128, Fn, ExprLoc);
10977       }
10978     }
10979     FD = FD->getPreviousDecl();
10980   }
10981 }
10982 
10983 namespace {
10984 /// Cleanup action for doacross support.
10985 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10986 public:
10987   static const int DoacrossFinArgs = 2;
10988 
10989 private:
10990   llvm::FunctionCallee RTLFn;
10991   llvm::Value *Args[DoacrossFinArgs];
10992 
10993 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)10994   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10995                     ArrayRef<llvm::Value *> CallArgs)
10996       : RTLFn(RTLFn) {
10997     assert(CallArgs.size() == DoacrossFinArgs);
10998     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10999   }
Emit(CodeGenFunction & CGF,Flags)11000   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11001     if (!CGF.HaveInsertPoint())
11002       return;
11003     CGF.EmitRuntimeCall(RTLFn, Args);
11004   }
11005 };
11006 } // namespace
11007 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11008 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11009                                        const OMPLoopDirective &D,
11010                                        ArrayRef<Expr *> NumIterations) {
11011   if (!CGF.HaveInsertPoint())
11012     return;
11013 
11014   ASTContext &C = CGM.getContext();
11015   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11016   RecordDecl *RD;
11017   if (KmpDimTy.isNull()) {
11018     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11019     //  kmp_int64 lo; // lower
11020     //  kmp_int64 up; // upper
11021     //  kmp_int64 st; // stride
11022     // };
11023     RD = C.buildImplicitRecord("kmp_dim");
11024     RD->startDefinition();
11025     addFieldToRecordDecl(C, RD, Int64Ty);
11026     addFieldToRecordDecl(C, RD, Int64Ty);
11027     addFieldToRecordDecl(C, RD, Int64Ty);
11028     RD->completeDefinition();
11029     KmpDimTy = C.getRecordType(RD);
11030   } else {
11031     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11032   }
11033   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11034   QualType ArrayTy =
11035       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11036 
11037   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11038   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11039   enum { LowerFD = 0, UpperFD, StrideFD };
11040   // Fill dims with data.
11041   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11042     LValue DimsLVal = CGF.MakeAddrLValue(
11043         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11044     // dims.upper = num_iterations;
11045     LValue UpperLVal = CGF.EmitLValueForField(
11046         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11047     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11048         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11049         Int64Ty, NumIterations[I]->getExprLoc());
11050     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11051     // dims.stride = 1;
11052     LValue StrideLVal = CGF.EmitLValueForField(
11053         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11054     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11055                           StrideLVal);
11056   }
11057 
11058   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11059   // kmp_int32 num_dims, struct kmp_dim * dims);
11060   llvm::Value *Args[] = {
11061       emitUpdateLocation(CGF, D.getBeginLoc()),
11062       getThreadID(CGF, D.getBeginLoc()),
11063       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11064       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11065           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11066           CGM.VoidPtrTy)};
11067 
11068   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11069       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11070   CGF.EmitRuntimeCall(RTLFn, Args);
11071   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11072       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11073   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11074       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11075   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11076                                              llvm::makeArrayRef(FiniArgs));
11077 }
11078 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11079 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11080                                           const OMPDependClause *C) {
11081   QualType Int64Ty =
11082       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11083   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11084   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11085       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11086   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11087   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11088     const Expr *CounterVal = C->getLoopData(I);
11089     assert(CounterVal);
11090     llvm::Value *CntVal = CGF.EmitScalarConversion(
11091         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11092         CounterVal->getExprLoc());
11093     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11094                           /*Volatile=*/false, Int64Ty);
11095   }
11096   llvm::Value *Args[] = {
11097       emitUpdateLocation(CGF, C->getBeginLoc()),
11098       getThreadID(CGF, C->getBeginLoc()),
11099       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11100   llvm::FunctionCallee RTLFn;
11101   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11102     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11103                                                   OMPRTL___kmpc_doacross_post);
11104   } else {
11105     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11106     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11107                                                   OMPRTL___kmpc_doacross_wait);
11108   }
11109   CGF.EmitRuntimeCall(RTLFn, Args);
11110 }
11111 
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11112 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11113                                llvm::FunctionCallee Callee,
11114                                ArrayRef<llvm::Value *> Args) const {
11115   assert(Loc.isValid() && "Outlined function call location must be valid.");
11116   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11117 
11118   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11119     if (Fn->doesNotThrow()) {
11120       CGF.EmitNounwindRuntimeCall(Fn, Args);
11121       return;
11122     }
11123   }
11124   CGF.EmitRuntimeCall(Callee, Args);
11125 }
11126 
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11127 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11128     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11129     ArrayRef<llvm::Value *> Args) const {
11130   emitCall(CGF, Loc, OutlinedFn, Args);
11131 }
11132 
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11133 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11134   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11135     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11136       HasEmittedDeclareTargetRegion = true;
11137 }
11138 
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11139 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11140                                              const VarDecl *NativeParam,
11141                                              const VarDecl *TargetParam) const {
11142   return CGF.GetAddrOfLocalVar(NativeParam);
11143 }
11144 
11145 namespace {
11146 /// Cleanup action for allocate support.
11147 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11148 public:
11149   static const int CleanupArgs = 3;
11150 
11151 private:
11152   llvm::FunctionCallee RTLFn;
11153   llvm::Value *Args[CleanupArgs];
11154 
11155 public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11156   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11157                        ArrayRef<llvm::Value *> CallArgs)
11158       : RTLFn(RTLFn) {
11159     assert(CallArgs.size() == CleanupArgs &&
11160            "Size of arguments does not match.");
11161     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11162   }
Emit(CodeGenFunction & CGF,Flags)11163   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11164     if (!CGF.HaveInsertPoint())
11165       return;
11166     CGF.EmitRuntimeCall(RTLFn, Args);
11167   }
11168 };
11169 } // namespace
11170 
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11171 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11172                                                    const VarDecl *VD) {
11173   if (!VD)
11174     return Address::invalid();
11175   const VarDecl *CVD = VD->getCanonicalDecl();
11176   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11177     return Address::invalid();
11178   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11179   // Use the default allocation.
11180   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11181        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11182       !AA->getAllocator())
11183     return Address::invalid();
11184   llvm::Value *Size;
11185   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11186   if (CVD->getType()->isVariablyModifiedType()) {
11187     Size = CGF.getTypeSize(CVD->getType());
11188     // Align the size: ((size + align - 1) / align) * align
11189     Size = CGF.Builder.CreateNUWAdd(
11190         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11191     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11192     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11193   } else {
11194     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11195     Size = CGM.getSize(Sz.alignTo(Align));
11196   }
11197   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11198   assert(AA->getAllocator() &&
11199          "Expected allocator expression for non-default allocator.");
11200   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11201   // According to the standard, the original allocator type is a enum (integer).
11202   // Convert to pointer type, if required.
11203   if (Allocator->getType()->isIntegerTy())
11204     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11205   else if (Allocator->getType()->isPointerTy())
11206     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11207                                                                 CGM.VoidPtrTy);
11208   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11209 
11210   llvm::Value *Addr =
11211       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11212                               CGM.getModule(), OMPRTL___kmpc_alloc),
11213                           Args, getName({CVD->getName(), ".void.addr"}));
11214   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11215                                                               Allocator};
11216   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11217       CGM.getModule(), OMPRTL___kmpc_free);
11218 
11219   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11220                                                 llvm::makeArrayRef(FiniArgs));
11221   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11222       Addr,
11223       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11224       getName({CVD->getName(), ".addr"}));
11225   return Address(Addr, Align);
11226 }
11227 
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11228 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11229     CodeGenModule &CGM, const OMPLoopDirective &S)
11230     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11231   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11232   if (!NeedToPush)
11233     return;
11234   NontemporalDeclsSet &DS =
11235       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11236   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11237     for (const Stmt *Ref : C->private_refs()) {
11238       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11239       const ValueDecl *VD;
11240       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11241         VD = DRE->getDecl();
11242       } else {
11243         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11244         assert((ME->isImplicitCXXThis() ||
11245                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11246                "Expected member of current class.");
11247         VD = ME->getMemberDecl();
11248       }
11249       DS.insert(VD);
11250     }
11251   }
11252 }
11253 
~NontemporalDeclsRAII()11254 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11255   if (!NeedToPush)
11256     return;
11257   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11258 }
11259 
isNontemporalDecl(const ValueDecl * VD) const11260 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11261   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11262 
11263   return llvm::any_of(
11264       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11265       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11266 }
11267 
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11268 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11269     const OMPExecutableDirective &S,
11270     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11271     const {
11272   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11273   // Vars in target/task regions must be excluded completely.
11274   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11275       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11276     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11277     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11278     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11279     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11280       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11281         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11282     }
11283   }
11284   // Exclude vars in private clauses.
11285   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11286     for (const Expr *Ref : C->varlists()) {
11287       if (!Ref->getType()->isScalarType())
11288         continue;
11289       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11290       if (!DRE)
11291         continue;
11292       NeedToCheckForLPCs.insert(DRE->getDecl());
11293     }
11294   }
11295   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11296     for (const Expr *Ref : C->varlists()) {
11297       if (!Ref->getType()->isScalarType())
11298         continue;
11299       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11300       if (!DRE)
11301         continue;
11302       NeedToCheckForLPCs.insert(DRE->getDecl());
11303     }
11304   }
11305   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11306     for (const Expr *Ref : C->varlists()) {
11307       if (!Ref->getType()->isScalarType())
11308         continue;
11309       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11310       if (!DRE)
11311         continue;
11312       NeedToCheckForLPCs.insert(DRE->getDecl());
11313     }
11314   }
11315   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11316     for (const Expr *Ref : C->varlists()) {
11317       if (!Ref->getType()->isScalarType())
11318         continue;
11319       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11320       if (!DRE)
11321         continue;
11322       NeedToCheckForLPCs.insert(DRE->getDecl());
11323     }
11324   }
11325   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11326     for (const Expr *Ref : C->varlists()) {
11327       if (!Ref->getType()->isScalarType())
11328         continue;
11329       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11330       if (!DRE)
11331         continue;
11332       NeedToCheckForLPCs.insert(DRE->getDecl());
11333     }
11334   }
11335   for (const Decl *VD : NeedToCheckForLPCs) {
11336     for (const LastprivateConditionalData &Data :
11337          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11338       if (Data.DeclToUniqueName.count(VD) > 0) {
11339         if (!Data.Disabled)
11340           NeedToAddForLPCsAsDisabled.insert(VD);
11341         break;
11342       }
11343     }
11344   }
11345 }
11346 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11347 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11348     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11349     : CGM(CGF.CGM),
11350       Action((CGM.getLangOpts().OpenMP >= 50 &&
11351               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11352                            [](const OMPLastprivateClause *C) {
11353                              return C->getKind() ==
11354                                     OMPC_LASTPRIVATE_conditional;
11355                            }))
11356                  ? ActionToDo::PushAsLastprivateConditional
11357                  : ActionToDo::DoNotPush) {
11358   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11359   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11360     return;
11361   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11362          "Expected a push action.");
11363   LastprivateConditionalData &Data =
11364       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11365   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11366     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11367       continue;
11368 
11369     for (const Expr *Ref : C->varlists()) {
11370       Data.DeclToUniqueName.insert(std::make_pair(
11371           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11372           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11373     }
11374   }
11375   Data.IVLVal = IVLVal;
11376   Data.Fn = CGF.CurFn;
11377 }
11378 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11379 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11380     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11381     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11382   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11383   if (CGM.getLangOpts().OpenMP < 50)
11384     return;
11385   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11386   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11387   if (!NeedToAddForLPCsAsDisabled.empty()) {
11388     Action = ActionToDo::DisableLastprivateConditional;
11389     LastprivateConditionalData &Data =
11390         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11391     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11392       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11393     Data.Fn = CGF.CurFn;
11394     Data.Disabled = true;
11395   }
11396 }
11397 
11398 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11399 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11400     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11401   return LastprivateConditionalRAII(CGF, S);
11402 }
11403 
~LastprivateConditionalRAII()11404 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11405   if (CGM.getLangOpts().OpenMP < 50)
11406     return;
11407   if (Action == ActionToDo::DisableLastprivateConditional) {
11408     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11409            "Expected list of disabled private vars.");
11410     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11411   }
11412   if (Action == ActionToDo::PushAsLastprivateConditional) {
11413     assert(
11414         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11415         "Expected list of lastprivate conditional vars.");
11416     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11417   }
11418 }
11419 
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11420 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11421                                                         const VarDecl *VD) {
11422   ASTContext &C = CGM.getContext();
11423   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11424   if (I == LastprivateConditionalToTypes.end())
11425     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11426   QualType NewType;
11427   const FieldDecl *VDField;
11428   const FieldDecl *FiredField;
11429   LValue BaseLVal;
11430   auto VI = I->getSecond().find(VD);
11431   if (VI == I->getSecond().end()) {
11432     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11433     RD->startDefinition();
11434     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11435     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11436     RD->completeDefinition();
11437     NewType = C.getRecordType(RD);
11438     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11439     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11440     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11441   } else {
11442     NewType = std::get<0>(VI->getSecond());
11443     VDField = std::get<1>(VI->getSecond());
11444     FiredField = std::get<2>(VI->getSecond());
11445     BaseLVal = std::get<3>(VI->getSecond());
11446   }
11447   LValue FiredLVal =
11448       CGF.EmitLValueForField(BaseLVal, FiredField);
11449   CGF.EmitStoreOfScalar(
11450       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11451       FiredLVal);
11452   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11453 }
11454 
11455 namespace {
11456 /// Checks if the lastprivate conditional variable is referenced in LHS.
11457 class LastprivateConditionalRefChecker final
11458     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11459   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11460   const Expr *FoundE = nullptr;
11461   const Decl *FoundD = nullptr;
11462   StringRef UniqueDeclName;
11463   LValue IVLVal;
11464   llvm::Function *FoundFn = nullptr;
11465   SourceLocation Loc;
11466 
11467 public:
VisitDeclRefExpr(const DeclRefExpr * E)11468   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11469     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11470          llvm::reverse(LPM)) {
11471       auto It = D.DeclToUniqueName.find(E->getDecl());
11472       if (It == D.DeclToUniqueName.end())
11473         continue;
11474       if (D.Disabled)
11475         return false;
11476       FoundE = E;
11477       FoundD = E->getDecl()->getCanonicalDecl();
11478       UniqueDeclName = It->second;
11479       IVLVal = D.IVLVal;
11480       FoundFn = D.Fn;
11481       break;
11482     }
11483     return FoundE == E;
11484   }
VisitMemberExpr(const MemberExpr * E)11485   bool VisitMemberExpr(const MemberExpr *E) {
11486     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11487       return false;
11488     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11489          llvm::reverse(LPM)) {
11490       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11491       if (It == D.DeclToUniqueName.end())
11492         continue;
11493       if (D.Disabled)
11494         return false;
11495       FoundE = E;
11496       FoundD = E->getMemberDecl()->getCanonicalDecl();
11497       UniqueDeclName = It->second;
11498       IVLVal = D.IVLVal;
11499       FoundFn = D.Fn;
11500       break;
11501     }
11502     return FoundE == E;
11503   }
VisitStmt(const Stmt * S)11504   bool VisitStmt(const Stmt *S) {
11505     for (const Stmt *Child : S->children()) {
11506       if (!Child)
11507         continue;
11508       if (const auto *E = dyn_cast<Expr>(Child))
11509         if (!E->isGLValue())
11510           continue;
11511       if (Visit(Child))
11512         return true;
11513     }
11514     return false;
11515   }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11516   explicit LastprivateConditionalRefChecker(
11517       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11518       : LPM(LPM) {}
11519   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const11520   getFoundData() const {
11521     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11522   }
11523 };
11524 } // namespace
11525 
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11526 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11527                                                        LValue IVLVal,
11528                                                        StringRef UniqueDeclName,
11529                                                        LValue LVal,
11530                                                        SourceLocation Loc) {
11531   // Last updated loop counter for the lastprivate conditional var.
11532   // int<xx> last_iv = 0;
11533   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11534   llvm::Constant *LastIV =
11535       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11536   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11537       IVLVal.getAlignment().getAsAlign());
11538   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11539 
11540   // Last value of the lastprivate conditional.
11541   // decltype(priv_a) last_a;
11542   llvm::Constant *Last = getOrCreateInternalVariable(
11543       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11544   cast<llvm::GlobalVariable>(Last)->setAlignment(
11545       LVal.getAlignment().getAsAlign());
11546   LValue LastLVal =
11547       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11548 
11549   // Global loop counter. Required to handle inner parallel-for regions.
11550   // iv
11551   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11552 
11553   // #pragma omp critical(a)
11554   // if (last_iv <= iv) {
11555   //   last_iv = iv;
11556   //   last_a = priv_a;
11557   // }
11558   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11559                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11560     Action.Enter(CGF);
11561     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11562     // (last_iv <= iv) ? Check if the variable is updated and store new
11563     // value in global var.
11564     llvm::Value *CmpRes;
11565     if (IVLVal.getType()->isSignedIntegerType()) {
11566       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11567     } else {
11568       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11569              "Loop iteration variable must be integer.");
11570       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11571     }
11572     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11573     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11574     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11575     // {
11576     CGF.EmitBlock(ThenBB);
11577 
11578     //   last_iv = iv;
11579     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11580 
11581     //   last_a = priv_a;
11582     switch (CGF.getEvaluationKind(LVal.getType())) {
11583     case TEK_Scalar: {
11584       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11585       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11586       break;
11587     }
11588     case TEK_Complex: {
11589       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11590       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11591       break;
11592     }
11593     case TEK_Aggregate:
11594       llvm_unreachable(
11595           "Aggregates are not supported in lastprivate conditional.");
11596     }
11597     // }
11598     CGF.EmitBranch(ExitBB);
11599     // There is no need to emit line number for unconditional branch.
11600     (void)ApplyDebugLocation::CreateEmpty(CGF);
11601     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11602   };
11603 
11604   if (CGM.getLangOpts().OpenMPSimd) {
11605     // Do not emit as a critical region as no parallel region could be emitted.
11606     RegionCodeGenTy ThenRCG(CodeGen);
11607     ThenRCG(CGF);
11608   } else {
11609     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11610   }
11611 }
11612 
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11613 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11614                                                          const Expr *LHS) {
11615   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11616     return;
11617   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11618   if (!Checker.Visit(LHS))
11619     return;
11620   const Expr *FoundE;
11621   const Decl *FoundD;
11622   StringRef UniqueDeclName;
11623   LValue IVLVal;
11624   llvm::Function *FoundFn;
11625   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11626       Checker.getFoundData();
11627   if (FoundFn != CGF.CurFn) {
11628     // Special codegen for inner parallel regions.
11629     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11630     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11631     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11632            "Lastprivate conditional is not found in outer region.");
11633     QualType StructTy = std::get<0>(It->getSecond());
11634     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11635     LValue PrivLVal = CGF.EmitLValue(FoundE);
11636     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11637         PrivLVal.getAddress(CGF),
11638         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11639     LValue BaseLVal =
11640         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11641     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11642     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11643                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11644                         FiredLVal, llvm::AtomicOrdering::Unordered,
11645                         /*IsVolatile=*/true, /*isInit=*/false);
11646     return;
11647   }
11648 
11649   // Private address of the lastprivate conditional in the current context.
11650   // priv_a
11651   LValue LVal = CGF.EmitLValue(FoundE);
11652   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11653                                    FoundE->getExprLoc());
11654 }
11655 
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11656 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11657     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11658     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11659   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11660     return;
11661   auto Range = llvm::reverse(LastprivateConditionalStack);
11662   auto It = llvm::find_if(
11663       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11664   if (It == Range.end() || It->Fn != CGF.CurFn)
11665     return;
11666   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11667   assert(LPCI != LastprivateConditionalToTypes.end() &&
11668          "Lastprivates must be registered already.");
11669   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11670   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11671   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11672   for (const auto &Pair : It->DeclToUniqueName) {
11673     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11674     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11675       continue;
11676     auto I = LPCI->getSecond().find(Pair.first);
11677     assert(I != LPCI->getSecond().end() &&
11678            "Lastprivate must be rehistered already.");
11679     // bool Cmp = priv_a.Fired != 0;
11680     LValue BaseLVal = std::get<3>(I->getSecond());
11681     LValue FiredLVal =
11682         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11683     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11684     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11685     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11686     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11687     // if (Cmp) {
11688     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11689     CGF.EmitBlock(ThenBB);
11690     Address Addr = CGF.GetAddrOfLocalVar(VD);
11691     LValue LVal;
11692     if (VD->getType()->isReferenceType())
11693       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11694                                            AlignmentSource::Decl);
11695     else
11696       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11697                                 AlignmentSource::Decl);
11698     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11699                                      D.getBeginLoc());
11700     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11701     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11702     // }
11703   }
11704 }
11705 
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11706 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11707     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11708     SourceLocation Loc) {
11709   if (CGF.getLangOpts().OpenMP < 50)
11710     return;
11711   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11712   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11713          "Unknown lastprivate conditional variable.");
11714   StringRef UniqueName = It->second;
11715   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11716   // The variable was not updated in the region - exit.
11717   if (!GV)
11718     return;
11719   LValue LPLVal = CGF.MakeAddrLValue(
11720       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11721   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11722   CGF.EmitStoreOfScalar(Res, PrivLVal);
11723 }
11724 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11725 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11726     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11727     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11728   llvm_unreachable("Not supported in SIMD-only mode");
11729 }
11730 
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11731 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11732     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11733     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11734   llvm_unreachable("Not supported in SIMD-only mode");
11735 }
11736 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)11737 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11738     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11739     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11740     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11741     bool Tied, unsigned &NumberOfParts) {
11742   llvm_unreachable("Not supported in SIMD-only mode");
11743 }
11744 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)11745 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11746                                            SourceLocation Loc,
11747                                            llvm::Function *OutlinedFn,
11748                                            ArrayRef<llvm::Value *> CapturedVars,
11749                                            const Expr *IfCond) {
11750   llvm_unreachable("Not supported in SIMD-only mode");
11751 }
11752 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)11753 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11754     CodeGenFunction &CGF, StringRef CriticalName,
11755     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11756     const Expr *Hint) {
11757   llvm_unreachable("Not supported in SIMD-only mode");
11758 }
11759 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)11760 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11761                                            const RegionCodeGenTy &MasterOpGen,
11762                                            SourceLocation Loc) {
11763   llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)11766 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11767                                             SourceLocation Loc) {
11768   llvm_unreachable("Not supported in SIMD-only mode");
11769 }
11770 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)11771 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11772     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11773     SourceLocation Loc) {
11774   llvm_unreachable("Not supported in SIMD-only mode");
11775 }
11776 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)11777 void CGOpenMPSIMDRuntime::emitSingleRegion(
11778     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11779     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11780     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11781     ArrayRef<const Expr *> AssignmentOps) {
11782   llvm_unreachable("Not supported in SIMD-only mode");
11783 }
11784 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)11785 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11786                                             const RegionCodeGenTy &OrderedOpGen,
11787                                             SourceLocation Loc,
11788                                             bool IsThreads) {
11789   llvm_unreachable("Not supported in SIMD-only mode");
11790 }
11791 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)11792 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11793                                           SourceLocation Loc,
11794                                           OpenMPDirectiveKind Kind,
11795                                           bool EmitChecks,
11796                                           bool ForceSimpleCall) {
11797   llvm_unreachable("Not supported in SIMD-only mode");
11798 }
11799 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)11800 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11801     CodeGenFunction &CGF, SourceLocation Loc,
11802     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11803     bool Ordered, const DispatchRTInput &DispatchValues) {
11804   llvm_unreachable("Not supported in SIMD-only mode");
11805 }
11806 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)11807 void CGOpenMPSIMDRuntime::emitForStaticInit(
11808     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11809     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11810   llvm_unreachable("Not supported in SIMD-only mode");
11811 }
11812 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)11813 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11814     CodeGenFunction &CGF, SourceLocation Loc,
11815     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11816   llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)11819 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11820                                                      SourceLocation Loc,
11821                                                      unsigned IVSize,
11822                                                      bool IVSigned) {
11823   llvm_unreachable("Not supported in SIMD-only mode");
11824 }
11825 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)11826 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11827                                               SourceLocation Loc,
11828                                               OpenMPDirectiveKind DKind) {
11829   llvm_unreachable("Not supported in SIMD-only mode");
11830 }
11831 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)11832 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11833                                               SourceLocation Loc,
11834                                               unsigned IVSize, bool IVSigned,
11835                                               Address IL, Address LB,
11836                                               Address UB, Address ST) {
11837   llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)11840 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11841                                                llvm::Value *NumThreads,
11842                                                SourceLocation Loc) {
11843   llvm_unreachable("Not supported in SIMD-only mode");
11844 }
11845 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)11846 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11847                                              ProcBindKind ProcBind,
11848                                              SourceLocation Loc) {
11849   llvm_unreachable("Not supported in SIMD-only mode");
11850 }
11851 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)11852 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11853                                                     const VarDecl *VD,
11854                                                     Address VDAddr,
11855                                                     SourceLocation Loc) {
11856   llvm_unreachable("Not supported in SIMD-only mode");
11857 }
11858 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)11859 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11860     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11861     CodeGenFunction *CGF) {
11862   llvm_unreachable("Not supported in SIMD-only mode");
11863 }
11864 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)11865 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11866     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11867   llvm_unreachable("Not supported in SIMD-only mode");
11868 }
11869 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)11870 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11871                                     ArrayRef<const Expr *> Vars,
11872                                     SourceLocation Loc,
11873                                     llvm::AtomicOrdering AO) {
11874   llvm_unreachable("Not supported in SIMD-only mode");
11875 }
11876 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11877 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11878                                        const OMPExecutableDirective &D,
11879                                        llvm::Function *TaskFunction,
11880                                        QualType SharedsTy, Address Shareds,
11881                                        const Expr *IfCond,
11882                                        const OMPTaskDataTy &Data) {
11883   llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11886 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11887     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11888     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11889     const Expr *IfCond, const OMPTaskDataTy &Data) {
11890   llvm_unreachable("Not supported in SIMD-only mode");
11891 }
11892 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)11893 void CGOpenMPSIMDRuntime::emitReduction(
11894     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11895     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11896     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11897   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11898   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11899                                  ReductionOps, Options);
11900 }
11901 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)11902 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11903     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11904     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11905   llvm_unreachable("Not supported in SIMD-only mode");
11906 }
11907 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)11908 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11909                                                 SourceLocation Loc,
11910                                                 bool IsWorksharingReduction) {
11911   llvm_unreachable("Not supported in SIMD-only mode");
11912 }
11913 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)11914 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11915                                                   SourceLocation Loc,
11916                                                   ReductionCodeGen &RCG,
11917                                                   unsigned N) {
11918   llvm_unreachable("Not supported in SIMD-only mode");
11919 }
11920 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)11921 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11922                                                   SourceLocation Loc,
11923                                                   llvm::Value *ReductionsPtr,
11924                                                   LValue SharedLVal) {
11925   llvm_unreachable("Not supported in SIMD-only mode");
11926 }
11927 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)11928 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11929                                            SourceLocation Loc) {
11930   llvm_unreachable("Not supported in SIMD-only mode");
11931 }
11932 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)11933 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11934     CodeGenFunction &CGF, SourceLocation Loc,
11935     OpenMPDirectiveKind CancelRegion) {
11936   llvm_unreachable("Not supported in SIMD-only mode");
11937 }
11938 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)11939 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11940                                          SourceLocation Loc, const Expr *IfCond,
11941                                          OpenMPDirectiveKind CancelRegion) {
11942   llvm_unreachable("Not supported in SIMD-only mode");
11943 }
11944 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)11945 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11946     const OMPExecutableDirective &D, StringRef ParentName,
11947     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11948     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11949   llvm_unreachable("Not supported in SIMD-only mode");
11950 }
11951 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)11952 void CGOpenMPSIMDRuntime::emitTargetCall(
11953     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11954     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11955     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11956     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11957                                      const OMPLoopDirective &D)>
11958         SizeEmitter) {
11959   llvm_unreachable("Not supported in SIMD-only mode");
11960 }
11961 
emitTargetFunctions(GlobalDecl GD)11962 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11963   llvm_unreachable("Not supported in SIMD-only mode");
11964 }
11965 
emitTargetGlobalVariable(GlobalDecl GD)11966 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11967   llvm_unreachable("Not supported in SIMD-only mode");
11968 }
11969 
emitTargetGlobal(GlobalDecl GD)11970 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11971   return false;
11972 }
11973 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)11974 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11975                                         const OMPExecutableDirective &D,
11976                                         SourceLocation Loc,
11977                                         llvm::Function *OutlinedFn,
11978                                         ArrayRef<llvm::Value *> CapturedVars) {
11979   llvm_unreachable("Not supported in SIMD-only mode");
11980 }
11981 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)11982 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11983                                              const Expr *NumTeams,
11984                                              const Expr *ThreadLimit,
11985                                              SourceLocation Loc) {
11986   llvm_unreachable("Not supported in SIMD-only mode");
11987 }
11988 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11989 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11990     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11991     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11992   llvm_unreachable("Not supported in SIMD-only mode");
11993 }
11994 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11995 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11996     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11997     const Expr *Device) {
11998   llvm_unreachable("Not supported in SIMD-only mode");
11999 }
12000 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)12001 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12002                                            const OMPLoopDirective &D,
12003                                            ArrayRef<Expr *> NumIterations) {
12004   llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12007 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12008                                               const OMPDependClause *C) {
12009   llvm_unreachable("Not supported in SIMD-only mode");
12010 }
12011 
12012 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const12013 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12014                                         const VarDecl *NativeParam) const {
12015   llvm_unreachable("Not supported in SIMD-only mode");
12016 }
12017 
12018 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12019 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12020                                          const VarDecl *NativeParam,
12021                                          const VarDecl *TargetParam) const {
12022   llvm_unreachable("Not supported in SIMD-only mode");
12023 }
12024