1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
emitUntiedSwitch(CodeGenFunction &)89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
getRegionKind() const91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
getDirectiveKind() const93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
hasCancel() const95   bool hasCancel() const { return HasCancel; }
96 
classof(const CGCapturedStmtInfo * Info)97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
getThreadIDVariable() const125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
getHelperName() const128   StringRef getHelperName() const override { return HelperName; }
129 
classof(const CGCapturedStmtInfo * Info)130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
emitUntiedSwitch(CodeGenFunction & CGF) const174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
getNumberOfParts() const192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
getThreadIDVariable() const206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
getHelperName() const212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
emitUntiedSwitch(CodeGenFunction & CGF)214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
classof(const CGCapturedStmtInfo * Info)218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
getContextValue() const244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
setContextValue(llvm::Value * V)250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
getThisFieldDecl() const267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
getThreadIDVariable() const275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
getHelperName() const289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
emitUntiedSwitch(CodeGenFunction & CGF)295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
getOldCSI() const300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
classof(const CGCapturedStmtInfo * Info)302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
getThreadIDVariable() const330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
getHelperName() const333   StringRef getHelperName() const override { return HelperName; }
334 
classof(const CGCapturedStmtInfo * Info)335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
getThreadIDVariable() const391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
getHelperName() const396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
classof(const CGCapturedStmtInfo * Info)400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
~InlinedOpenMPRegionRAII()436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
CleanupTy(PrePostActionTy * Action)584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
operator ()(CodeGenFunction & CGF) const594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,const OMPDeclareReductionDecl * DRD)777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
emitAggregateType(CodeGenFunction & CGF,unsigned N)828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
needCleanups(unsigned N)918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,llvm::Type * BaseLVType,CharUnits BaseLVAlignment,llvm::Value * Addr)959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
usesReductionInitializer(unsigned N) const1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
getThreadIDVariableLValue(CodeGenFunction & CGF)1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
EmitBody(CodeGenFunction & CGF,const Stmt * S)1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
getThreadIDVariableLValue(CodeGenFunction & CGF)1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
clear()1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
getName(ArrayRef<StringRef> Parts) const1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
PushAndPopStackRAII__anone1a752750811::PushAndPopStackRAII1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
~PushAndPopStackRAII__anone1a752750811::PushAndPopStackRAII1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
buildStructValue(ConstantStructBuilder & Fields,CodeGenModule & CGM,const RecordDecl * RD,const CGRecordLayout & RL,ArrayRef<llvm::Constant * > Data)1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule & CGM,QualType Ty,bool IsConstant,ArrayRef<llvm::Constant * > Data,const Twine & Name,As &&...Args)1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
createConstantGlobalStructAndAddToParent(CodeGenModule & CGM,QualType Ty,ArrayRef<llvm::Constant * > Data,T & Parent)1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1452                                                 Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
functionFinished(CodeGenFunction & CGF)1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
getIdentTyPointerTy()1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
getKmpc_MicroPointerTy()1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned)1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1564   assert((IVSize == 32 || IVSize == 64) &&
1565          "IV size is not compatible with the omp runtime");
1566   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567                                             : "__kmpc_for_static_init_4u")
1568                                 : (IVSigned ? "__kmpc_for_static_init_8"
1569                                             : "__kmpc_for_static_init_8u");
1570   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572   llvm::Type *TypeParams[] = {
1573     getIdentTyPointerTy(),                     // loc
1574     CGM.Int32Ty,                               // tid
1575     CGM.Int32Ty,                               // schedtype
1576     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577     PtrTy,                                     // p_lower
1578     PtrTy,                                     // p_upper
1579     PtrTy,                                     // p_stride
1580     ITy,                                       // incr
1581     ITy                                        // chunk
1582   };
1583   auto *FnTy =
1584       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585   return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587 
1588 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590   assert((IVSize == 32 || IVSize == 64) &&
1591          "IV size is not compatible with the omp runtime");
1592   StringRef Name =
1593       IVSize == 32
1594           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598                                CGM.Int32Ty,           // tid
1599                                CGM.Int32Ty,           // schedtype
1600                                ITy,                   // lower
1601                                ITy,                   // upper
1602                                ITy,                   // stride
1603                                ITy                    // chunk
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618   llvm::Type *TypeParams[] = {
1619       getIdentTyPointerTy(), // loc
1620       CGM.Int32Ty,           // tid
1621   };
1622   auto *FnTy =
1623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624   return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626 
1627 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629   assert((IVSize == 32 || IVSize == 64) &&
1630          "IV size is not compatible with the omp runtime");
1631   StringRef Name =
1632       IVSize == 32
1633           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637   llvm::Type *TypeParams[] = {
1638     getIdentTyPointerTy(),                     // loc
1639     CGM.Int32Ty,                               // tid
1640     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641     PtrTy,                                     // p_lower
1642     PtrTy,                                     // p_upper
1643     PtrTy                                      // p_stride
1644   };
1645   auto *FnTy =
1646       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647   return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649 
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654                                      unsigned &DeviceID, unsigned &FileID,
1655                                      unsigned &LineNum) {
1656   SourceManager &SM = C.getSourceManager();
1657 
1658   // The loc should be always valid and have a file ID (the user cannot use
1659   // #pragma directives in macros)
1660 
1661   assert(Loc.isValid() && "Source location is expected to be always valid.");
1662 
1663   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665 
1666   llvm::sys::fs::UniqueID ID;
1667   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672           << PLoc.getFilename() << EC.message();
1673   }
1674 
1675   DeviceID = ID.getDevice();
1676   FileID = ID.getFile();
1677   LineNum = PLoc.getLine();
1678 }
1679 
getAddrOfDeclareTargetVar(const VarDecl * VD)1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681   if (CGM.getLangOpts().OpenMPSimd)
1682     return Address::invalid();
1683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687                HasRequiresUnifiedSharedMemory))) {
1688     SmallString<64> PtrName;
1689     {
1690       llvm::raw_svector_ostream OS(PtrName);
1691       OS << CGM.getMangledName(GlobalDecl(VD));
1692       if (!VD->isExternallyVisible()) {
1693         unsigned DeviceID, FileID, Line;
1694         getTargetEntryUniqueInfo(CGM.getContext(),
1695                                  VD->getCanonicalDecl()->getBeginLoc(),
1696                                  DeviceID, FileID, Line);
1697         OS << llvm::format("_%x", FileID);
1698       }
1699       OS << "_decl_tgt_ref_ptr";
1700     }
1701     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702     if (!Ptr) {
1703       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705                                         PtrName);
1706 
1707       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709 
1710       if (!CGM.getLangOpts().OpenMPIsDevice)
1711         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713     }
1714     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1715   }
1716   return Address::invalid();
1717 }
1718 
1719 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722          !CGM.getContext().getTargetInfo().isTLSSupported());
1723   // Lookup the entry, lazily creating it if necessary.
1724   std::string Suffix = getName({"cache", ""});
1725   return getOrCreateInternalVariable(
1726       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730                                                 const VarDecl *VD,
1731                                                 Address VDAddr,
1732                                                 SourceLocation Loc) {
1733   if (CGM.getLangOpts().OpenMPUseTLS &&
1734       CGM.getContext().getTargetInfo().isTLSSupported())
1735     return VDAddr;
1736 
1737   llvm::Type *VarTy = VDAddr.getElementType();
1738   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1739                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1740                                                        CGM.Int8PtrTy),
1741                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742                          getOrCreateThreadPrivateCache(VD)};
1743   return Address(CGF.EmitRuntimeCall(
1744                      OMPBuilder.getOrCreateRuntimeFunction(
1745                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1746                      Args),
1747                  VDAddr.getAlignment());
1748 }
1749 
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754   // library.
1755   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1756   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1757                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1758                       OMPLoc);
1759   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1760   // to register constructor/destructor for variable.
1761   llvm::Value *Args[] = {
1762       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1763       Ctor, CopyCtor, Dtor};
1764   CGF.EmitRuntimeCall(
1765       OMPBuilder.getOrCreateRuntimeFunction(
1766           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1767       Args);
1768 }
1769 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1779     QualType ASTTy = VD->getType();
1780 
1781     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1782     const Expr *Init = VD->getAnyInitializer();
1783     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1784       // Generate function that re-emits the declaration's initializer into the
1785       // threadprivate copy of the variable VD
1786       CodeGenFunction CtorCGF(CGM);
1787       FunctionArgList Args;
1788       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1789                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1790                             ImplicitParamDecl::Other);
1791       Args.push_back(&Dst);
1792 
1793       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1796       std::string Name = getName({"__kmpc_global_ctor_", ""});
1797       llvm::Function *Fn =
1798           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1799       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1800                             Args, Loc, Loc);
1801       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1802           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1803           CGM.getContext().VoidPtrTy, Dst.getLocation());
1804       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1805       Arg = CtorCGF.Builder.CreateElementBitCast(
1806           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1807       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1808                                /*IsInitializer=*/true);
1809       ArgVal = CtorCGF.EmitLoadOfScalar(
1810           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1811           CGM.getContext().VoidPtrTy, Dst.getLocation());
1812       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1813       CtorCGF.FinishFunction();
1814       Ctor = Fn;
1815     }
1816     if (VD->getType().isDestructedType() != QualType::DK_none) {
1817       // Generate function that emits destructor call for the threadprivate copy
1818       // of the variable VD
1819       CodeGenFunction DtorCGF(CGM);
1820       FunctionArgList Args;
1821       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1822                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1823                             ImplicitParamDecl::Other);
1824       Args.push_back(&Dst);
1825 
1826       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1829       std::string Name = getName({"__kmpc_global_dtor_", ""});
1830       llvm::Function *Fn =
1831           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1832       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1833       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1834                             Loc, Loc);
1835       // Create a scope with an artificial location for the body of this function.
1836       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1837       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1838           DtorCGF.GetAddrOfLocalVar(&Dst),
1839           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1840       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1841                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1842                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1843       DtorCGF.FinishFunction();
1844       Dtor = Fn;
1845     }
1846     // Do not emit init function if it is not required.
1847     if (!Ctor && !Dtor)
1848       return nullptr;
1849 
1850     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1851     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                                /*isVarArg=*/false)
1853                            ->getPointerTo();
1854     // Copying constructor for the threadprivate variable.
1855     // Must be NULL - reserved by runtime, but currently it requires that this
1856     // parameter is always NULL. Otherwise it fires assertion.
1857     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1858     if (Ctor == nullptr) {
1859       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1860                                              /*isVarArg=*/false)
1861                          ->getPointerTo();
1862       Ctor = llvm::Constant::getNullValue(CtorTy);
1863     }
1864     if (Dtor == nullptr) {
1865       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Dtor = llvm::Constant::getNullValue(DtorTy);
1869     }
1870     if (!CGF) {
1871       auto *InitFunctionTy =
1872           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1873       std::string Name = getName({"__omp_threadprivate_init_", ""});
1874       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1875           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1876       CodeGenFunction InitCGF(CGM);
1877       FunctionArgList ArgList;
1878       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1879                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1880                             Loc, Loc);
1881       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882       InitCGF.FinishFunction();
1883       return InitFunction;
1884     }
1885     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886   }
1887   return nullptr;
1888 }
1889 
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1891                                                      llvm::GlobalVariable *Addr,
1892                                                      bool PerformInit) {
1893   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1894       !CGM.getLangOpts().OpenMPIsDevice)
1895     return false;
1896   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1898   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1899       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1900        HasRequiresUnifiedSharedMemory))
1901     return CGM.getLangOpts().OpenMPIsDevice;
1902   VD = VD->getDefinition(CGM.getContext());
1903   assert(VD && "Unknown VarDecl");
1904 
1905   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1906     return CGM.getLangOpts().OpenMPIsDevice;
1907 
1908   QualType ASTTy = VD->getType();
1909   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1910 
1911   // Produce the unique prefix to identify the new target regions. We use
1912   // the source location of the variable declaration which we know to not
1913   // conflict with any target region.
1914   unsigned DeviceID;
1915   unsigned FileID;
1916   unsigned Line;
1917   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1918   SmallString<128> Buffer, Out;
1919   {
1920     llvm::raw_svector_ostream OS(Buffer);
1921     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1922        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1923   }
1924 
1925   const Expr *Init = VD->getAnyInitializer();
1926   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1927     llvm::Constant *Ctor;
1928     llvm::Constant *ID;
1929     if (CGM.getLangOpts().OpenMPIsDevice) {
1930       // Generate function that re-emits the declaration's initializer into
1931       // the threadprivate copy of the variable VD
1932       CodeGenFunction CtorCGF(CGM);
1933 
1934       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1935       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1936       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1937           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1938       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1939       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1940                             FunctionArgList(), Loc, Loc);
1941       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1942       CtorCGF.EmitAnyExprToMem(Init,
1943                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1944                                Init->getType().getQualifiers(),
1945                                /*IsInitializer=*/true);
1946       CtorCGF.FinishFunction();
1947       Ctor = Fn;
1948       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1949       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1950     } else {
1951       Ctor = new llvm::GlobalVariable(
1952           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953           llvm::GlobalValue::PrivateLinkage,
1954           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955       ID = Ctor;
1956     }
1957 
1958     // Register the information for the entry associated with the constructor.
1959     Out.clear();
1960     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963   }
1964   if (VD->getType().isDestructedType() != QualType::DK_none) {
1965     llvm::Constant *Dtor;
1966     llvm::Constant *ID;
1967     if (CGM.getLangOpts().OpenMPIsDevice) {
1968       // Generate function that emits destructor call for the threadprivate
1969       // copy of the variable VD
1970       CodeGenFunction DtorCGF(CGM);
1971 
1972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1976       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1977       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1978                             FunctionArgList(), Loc, Loc);
1979       // Create a scope with an artificial location for the body of this
1980       // function.
1981       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1982       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1983                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1984                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1985       DtorCGF.FinishFunction();
1986       Dtor = Fn;
1987       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1988       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1989     } else {
1990       Dtor = new llvm::GlobalVariable(
1991           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1992           llvm::GlobalValue::PrivateLinkage,
1993           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1994       ID = Dtor;
1995     }
1996     // Register the information for the entry associated with the destructor.
1997     Out.clear();
1998     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1999         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2000         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2001   }
2002   return CGM.getLangOpts().OpenMPIsDevice;
2003 }
2004 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2006                                                           QualType VarType,
2007                                                           StringRef Name) {
2008   std::string Suffix = getName({"artificial", ""});
2009   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2010   llvm::Value *GAddr =
2011       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2012   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2013       CGM.getTarget().isTLSSupported()) {
2014     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2015     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2016   }
2017   std::string CacheSuffix = getName({"cache", ""});
2018   llvm::Value *Args[] = {
2019       emitUpdateLocation(CGF, SourceLocation()),
2020       getThreadID(CGF, SourceLocation()),
2021       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2022       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2023                                 /*isSigned=*/false),
2024       getOrCreateInternalVariable(
2025           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2026   return Address(
2027       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2028           CGF.EmitRuntimeCall(
2029               OMPBuilder.getOrCreateRuntimeFunction(
2030                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2031               Args),
2032           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2033       CGM.getContext().getTypeAlignInChars(VarType));
2034 }
2035 
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2037                                    const RegionCodeGenTy &ThenGen,
2038                                    const RegionCodeGenTy &ElseGen) {
2039   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2040 
2041   // If the condition constant folds and can be elided, try to avoid emitting
2042   // the condition and the dead arm of the if/else.
2043   bool CondConstant;
2044   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2045     if (CondConstant)
2046       ThenGen(CGF);
2047     else
2048       ElseGen(CGF);
2049     return;
2050   }
2051 
2052   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2053   // emit the conditional branch.
2054   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2055   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2056   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2057   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2058 
2059   // Emit the 'then' code.
2060   CGF.EmitBlock(ThenBlock);
2061   ThenGen(CGF);
2062   CGF.EmitBranch(ContBlock);
2063   // Emit the 'else' code if present.
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBlock(ElseBlock);
2067   ElseGen(CGF);
2068   // There is no need to emit line number for unconditional branch.
2069   (void)ApplyDebugLocation::CreateEmpty(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the continuation block for code after the if.
2072   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2073 }
2074 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2076                                        llvm::Function *OutlinedFn,
2077                                        ArrayRef<llvm::Value *> CapturedVars,
2078                                        const Expr *IfCond) {
2079   if (!CGF.HaveInsertPoint())
2080     return;
2081   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2082   auto &M = CGM.getModule();
2083   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2084                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2085     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2086     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2087     llvm::Value *Args[] = {
2088         RTLoc,
2089         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2090         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2091     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2092     RealArgs.append(std::begin(Args), std::end(Args));
2093     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2094 
2095     llvm::FunctionCallee RTLFn =
2096         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2097     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2098   };
2099   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2100                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2101     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2102     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2103     // Build calls:
2104     // __kmpc_serialized_parallel(&Loc, GTid);
2105     llvm::Value *Args[] = {RTLoc, ThreadID};
2106     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2107                             M, OMPRTL___kmpc_serialized_parallel),
2108                         Args);
2109 
2110     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2111     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2112     Address ZeroAddrBound =
2113         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2114                                          /*Name=*/".bound.zero.addr");
2115     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2116     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2117     // ThreadId for serialized parallels is 0.
2118     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2119     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2120     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2121 
2122     // Ensure we do not inline the function. This is trivially true for the ones
2123     // passed to __kmpc_fork_call but the ones called in serialized regions
2124     // could be inlined. This is not a perfect but it is closer to the invariant
2125     // we want, namely, every data environment starts with a new function.
2126     // TODO: We should pass the if condition to the runtime function and do the
2127     //       handling there. Much cleaner code.
2128     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2129     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2130     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2131 
2132     // __kmpc_end_serialized_parallel(&Loc, GTid);
2133     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2134     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2135                             M, OMPRTL___kmpc_end_serialized_parallel),
2136                         EndArgs);
2137   };
2138   if (IfCond) {
2139     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2140   } else {
2141     RegionCodeGenTy ThenRCG(ThenGen);
2142     ThenRCG(CGF);
2143   }
2144 }
2145 
2146 // If we're inside an (outlined) parallel region, use the region info's
2147 // thread-ID variable (it is passed in a first argument of the outlined function
2148 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2149 // regular serial code region, get thread ID by calling kmp_int32
2150 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2151 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)2152 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2153                                              SourceLocation Loc) {
2154   if (auto *OMPRegionInfo =
2155           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2156     if (OMPRegionInfo->getThreadIDVariable())
2157       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2158 
2159   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2160   QualType Int32Ty =
2161       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2162   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2163   CGF.EmitStoreOfScalar(ThreadID,
2164                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2165 
2166   return ThreadIDTemp;
2167 }
2168 
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)2169 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2170     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2171   SmallString<256> Buffer;
2172   llvm::raw_svector_ostream Out(Buffer);
2173   Out << Name;
2174   StringRef RuntimeName = Out.str();
2175   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2176   if (Elem.second) {
2177     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2178            "OMP internal variable has different type than requested");
2179     return &*Elem.second;
2180   }
2181 
2182   return Elem.second = new llvm::GlobalVariable(
2183              CGM.getModule(), Ty, /*IsConstant*/ false,
2184              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2185              Elem.first(), /*InsertBefore=*/nullptr,
2186              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2187 }
2188 
getCriticalRegionLock(StringRef CriticalName)2189 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2190   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2191   std::string Name = getName({Prefix, "var"});
2192   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2193 }
2194 
2195 namespace {
2196 /// Common pre(post)-action for different OpenMP constructs.
2197 class CommonActionTy final : public PrePostActionTy {
2198   llvm::FunctionCallee EnterCallee;
2199   ArrayRef<llvm::Value *> EnterArgs;
2200   llvm::FunctionCallee ExitCallee;
2201   ArrayRef<llvm::Value *> ExitArgs;
2202   bool Conditional;
2203   llvm::BasicBlock *ContBlock = nullptr;
2204 
2205 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2206   CommonActionTy(llvm::FunctionCallee EnterCallee,
2207                  ArrayRef<llvm::Value *> EnterArgs,
2208                  llvm::FunctionCallee ExitCallee,
2209                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2210       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2211         ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2212   void Enter(CodeGenFunction &CGF) override {
2213     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2214     if (Conditional) {
2215       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2216       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2217       ContBlock = CGF.createBasicBlock("omp_if.end");
2218       // Generate the branch (If-stmt)
2219       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2220       CGF.EmitBlock(ThenBlock);
2221     }
2222   }
Done(CodeGenFunction & CGF)2223   void Done(CodeGenFunction &CGF) {
2224     // Emit the rest of blocks/branches
2225     CGF.EmitBranch(ContBlock);
2226     CGF.EmitBlock(ContBlock, true);
2227   }
Exit(CodeGenFunction & CGF)2228   void Exit(CodeGenFunction &CGF) override {
2229     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2230   }
2231 };
2232 } // anonymous namespace
2233 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2234 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2235                                          StringRef CriticalName,
2236                                          const RegionCodeGenTy &CriticalOpGen,
2237                                          SourceLocation Loc, const Expr *Hint) {
2238   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2239   // CriticalOpGen();
2240   // __kmpc_end_critical(ident_t *, gtid, Lock);
2241   // Prepare arguments and build a call to __kmpc_critical
2242   if (!CGF.HaveInsertPoint())
2243     return;
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2245                          getCriticalRegionLock(CriticalName)};
2246   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2247                                                 std::end(Args));
2248   if (Hint) {
2249     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2250         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2251   }
2252   CommonActionTy Action(
2253       OMPBuilder.getOrCreateRuntimeFunction(
2254           CGM.getModule(),
2255           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2256       EnterArgs,
2257       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2258                                             OMPRTL___kmpc_end_critical),
2259       Args);
2260   CriticalOpGen.setAction(Action);
2261   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2262 }
2263 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2264 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2265                                        const RegionCodeGenTy &MasterOpGen,
2266                                        SourceLocation Loc) {
2267   if (!CGF.HaveInsertPoint())
2268     return;
2269   // if(__kmpc_master(ident_t *, gtid)) {
2270   //   MasterOpGen();
2271   //   __kmpc_end_master(ident_t *, gtid);
2272   // }
2273   // Prepare arguments and build a call to __kmpc_master
2274   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276                             CGM.getModule(), OMPRTL___kmpc_master),
2277                         Args,
2278                         OMPBuilder.getOrCreateRuntimeFunction(
2279                             CGM.getModule(), OMPRTL___kmpc_end_master),
2280                         Args,
2281                         /*Conditional=*/true);
2282   MasterOpGen.setAction(Action);
2283   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2284   Action.Done(CGF);
2285 }
2286 
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2287 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2288                                        const RegionCodeGenTy &MaskedOpGen,
2289                                        SourceLocation Loc, const Expr *Filter) {
2290   if (!CGF.HaveInsertPoint())
2291     return;
2292   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2293   //   MaskedOpGen();
2294   //   __kmpc_end_masked(iden_t *, gtid);
2295   // }
2296   // Prepare arguments and build a call to __kmpc_masked
2297   llvm::Value *FilterVal = Filter
2298                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2299                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2300   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2301                          FilterVal};
2302   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2303                             getThreadID(CGF, Loc)};
2304   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2305                             CGM.getModule(), OMPRTL___kmpc_masked),
2306                         Args,
2307                         OMPBuilder.getOrCreateRuntimeFunction(
2308                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2309                         ArgsEnd,
2310                         /*Conditional=*/true);
2311   MaskedOpGen.setAction(Action);
2312   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2313   Action.Done(CGF);
2314 }
2315 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2316 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2317                                         SourceLocation Loc) {
2318   if (!CGF.HaveInsertPoint())
2319     return;
2320   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2321     OMPBuilder.createTaskyield(CGF.Builder);
2322   } else {
2323     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2324     llvm::Value *Args[] = {
2325         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2326         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2327     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2328                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2329                         Args);
2330   }
2331 
2332   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2333     Region->emitUntiedSwitch(CGF);
2334 }
2335 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2337                                           const RegionCodeGenTy &TaskgroupOpGen,
2338                                           SourceLocation Loc) {
2339   if (!CGF.HaveInsertPoint())
2340     return;
2341   // __kmpc_taskgroup(ident_t *, gtid);
2342   // TaskgroupOpGen();
2343   // __kmpc_end_taskgroup(ident_t *, gtid);
2344   // Prepare arguments and build a call to __kmpc_taskgroup
2345   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2346   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2347                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2348                         Args,
2349                         OMPBuilder.getOrCreateRuntimeFunction(
2350                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2351                         Args);
2352   TaskgroupOpGen.setAction(Action);
2353   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2354 }
2355 
2356 /// Given an array of pointers to variables, project the address of a
2357 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2358 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2359                                       unsigned Index, const VarDecl *Var) {
2360   // Pull out the pointer to the variable.
2361   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2362   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2363 
2364   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2365   Addr = CGF.Builder.CreateElementBitCast(
2366       Addr, CGF.ConvertTypeForMem(Var->getType()));
2367   return Addr;
2368 }
2369 
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2370 static llvm::Value *emitCopyprivateCopyFunction(
2371     CodeGenModule &CGM, llvm::Type *ArgsType,
2372     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2373     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2374     SourceLocation Loc) {
2375   ASTContext &C = CGM.getContext();
2376   // void copy_func(void *LHSArg, void *RHSArg);
2377   FunctionArgList Args;
2378   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2379                            ImplicitParamDecl::Other);
2380   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2381                            ImplicitParamDecl::Other);
2382   Args.push_back(&LHSArg);
2383   Args.push_back(&RHSArg);
2384   const auto &CGFI =
2385       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2386   std::string Name =
2387       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2388   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2389                                     llvm::GlobalValue::InternalLinkage, Name,
2390                                     &CGM.getModule());
2391   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2392   Fn->setDoesNotRecurse();
2393   CodeGenFunction CGF(CGM);
2394   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2395   // Dest = (void*[n])(LHSArg);
2396   // Src = (void*[n])(RHSArg);
2397   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2398       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2399       ArgsType), CGF.getPointerAlign());
2400   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2402       ArgsType), CGF.getPointerAlign());
2403   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2404   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2405   // ...
2406   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2407   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2408     const auto *DestVar =
2409         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2410     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2411 
2412     const auto *SrcVar =
2413         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2414     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2415 
2416     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2417     QualType Type = VD->getType();
2418     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2419   }
2420   CGF.FinishFunction();
2421   return Fn;
2422 }
2423 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2424 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2425                                        const RegionCodeGenTy &SingleOpGen,
2426                                        SourceLocation Loc,
2427                                        ArrayRef<const Expr *> CopyprivateVars,
2428                                        ArrayRef<const Expr *> SrcExprs,
2429                                        ArrayRef<const Expr *> DstExprs,
2430                                        ArrayRef<const Expr *> AssignmentOps) {
2431   if (!CGF.HaveInsertPoint())
2432     return;
2433   assert(CopyprivateVars.size() == SrcExprs.size() &&
2434          CopyprivateVars.size() == DstExprs.size() &&
2435          CopyprivateVars.size() == AssignmentOps.size());
2436   ASTContext &C = CGM.getContext();
2437   // int32 did_it = 0;
2438   // if(__kmpc_single(ident_t *, gtid)) {
2439   //   SingleOpGen();
2440   //   __kmpc_end_single(ident_t *, gtid);
2441   //   did_it = 1;
2442   // }
2443   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2444   // <copy_func>, did_it);
2445 
2446   Address DidIt = Address::invalid();
2447   if (!CopyprivateVars.empty()) {
2448     // int32 did_it = 0;
2449     QualType KmpInt32Ty =
2450         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2451     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2452     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2453   }
2454   // Prepare arguments and build a call to __kmpc_single
2455   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2456   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_single),
2458                         Args,
2459                         OMPBuilder.getOrCreateRuntimeFunction(
2460                             CGM.getModule(), OMPRTL___kmpc_end_single),
2461                         Args,
2462                         /*Conditional=*/true);
2463   SingleOpGen.setAction(Action);
2464   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2465   if (DidIt.isValid()) {
2466     // did_it = 1;
2467     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2468   }
2469   Action.Done(CGF);
2470   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2471   // <copy_func>, did_it);
2472   if (DidIt.isValid()) {
2473     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2474     QualType CopyprivateArrayTy = C.getConstantArrayType(
2475         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2476         /*IndexTypeQuals=*/0);
2477     // Create a list of all private variables for copyprivate.
2478     Address CopyprivateList =
2479         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2480     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2481       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2482       CGF.Builder.CreateStore(
2483           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2484               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2485               CGF.VoidPtrTy),
2486           Elem);
2487     }
2488     // Build function that copies private values from single region to all other
2489     // threads in the corresponding parallel region.
2490     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2491         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2492         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2493     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2494     Address CL =
2495       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2496                                                       CGF.VoidPtrTy);
2497     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2498     llvm::Value *Args[] = {
2499         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2500         getThreadID(CGF, Loc),        // i32 <gtid>
2501         BufSize,                      // size_t <buf_size>
2502         CL.getPointer(),              // void *<copyprivate list>
2503         CpyFn,                        // void (*) (void *, void *) <copy_func>
2504         DidItVal                      // i32 did_it
2505     };
2506     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2507                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2508                         Args);
2509   }
2510 }
2511 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2512 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2513                                         const RegionCodeGenTy &OrderedOpGen,
2514                                         SourceLocation Loc, bool IsThreads) {
2515   if (!CGF.HaveInsertPoint())
2516     return;
2517   // __kmpc_ordered(ident_t *, gtid);
2518   // OrderedOpGen();
2519   // __kmpc_end_ordered(ident_t *, gtid);
2520   // Prepare arguments and build a call to __kmpc_ordered
2521   if (IsThreads) {
2522     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2523     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2524                               CGM.getModule(), OMPRTL___kmpc_ordered),
2525                           Args,
2526                           OMPBuilder.getOrCreateRuntimeFunction(
2527                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2528                           Args);
2529     OrderedOpGen.setAction(Action);
2530     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2531     return;
2532   }
2533   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2534 }
2535 
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2536 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2537   unsigned Flags;
2538   if (Kind == OMPD_for)
2539     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2540   else if (Kind == OMPD_sections)
2541     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2542   else if (Kind == OMPD_single)
2543     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2544   else if (Kind == OMPD_barrier)
2545     Flags = OMP_IDENT_BARRIER_EXPL;
2546   else
2547     Flags = OMP_IDENT_BARRIER_IMPL;
2548   return Flags;
2549 }
2550 
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2551 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2552     CodeGenFunction &CGF, const OMPLoopDirective &S,
2553     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2554   // Check if the loop directive is actually a doacross loop directive. In this
2555   // case choose static, 1 schedule.
2556   if (llvm::any_of(
2557           S.getClausesOfKind<OMPOrderedClause>(),
2558           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2559     ScheduleKind = OMPC_SCHEDULE_static;
2560     // Chunk size is 1 in this case.
2561     llvm::APInt ChunkSize(32, 1);
2562     ChunkExpr = IntegerLiteral::Create(
2563         CGF.getContext(), ChunkSize,
2564         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2565         SourceLocation());
2566   }
2567 }
2568 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2569 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2570                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2571                                       bool ForceSimpleCall) {
2572   // Check if we should use the OMPBuilder
2573   auto *OMPRegionInfo =
2574       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2575   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2576     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2577         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2578     return;
2579   }
2580 
2581   if (!CGF.HaveInsertPoint())
2582     return;
2583   // Build call __kmpc_cancel_barrier(loc, thread_id);
2584   // Build call __kmpc_barrier(loc, thread_id);
2585   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2586   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2587   // thread_id);
2588   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2589                          getThreadID(CGF, Loc)};
2590   if (OMPRegionInfo) {
2591     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2592       llvm::Value *Result = CGF.EmitRuntimeCall(
2593           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2594                                                 OMPRTL___kmpc_cancel_barrier),
2595           Args);
2596       if (EmitChecks) {
2597         // if (__kmpc_cancel_barrier()) {
2598         //   exit from construct;
2599         // }
2600         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2601         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2602         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2603         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2604         CGF.EmitBlock(ExitBB);
2605         //   exit from construct;
2606         CodeGenFunction::JumpDest CancelDestination =
2607             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2608         CGF.EmitBranchThroughCleanup(CancelDestination);
2609         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2610       }
2611       return;
2612     }
2613   }
2614   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2615                           CGM.getModule(), OMPRTL___kmpc_barrier),
2616                       Args);
2617 }
2618 
2619 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2620 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2621                                           bool Chunked, bool Ordered) {
2622   switch (ScheduleKind) {
2623   case OMPC_SCHEDULE_static:
2624     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2625                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2626   case OMPC_SCHEDULE_dynamic:
2627     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2628   case OMPC_SCHEDULE_guided:
2629     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2630   case OMPC_SCHEDULE_runtime:
2631     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2632   case OMPC_SCHEDULE_auto:
2633     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2634   case OMPC_SCHEDULE_unknown:
2635     assert(!Chunked && "chunk was specified but schedule kind not known");
2636     return Ordered ? OMP_ord_static : OMP_sch_static;
2637   }
2638   llvm_unreachable("Unexpected runtime schedule");
2639 }
2640 
2641 /// Map the OpenMP distribute schedule to the runtime enumeration.
2642 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2643 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2644   // only static is allowed for dist_schedule
2645   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2646 }
2647 
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2648 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2649                                          bool Chunked) const {
2650   OpenMPSchedType Schedule =
2651       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2652   return Schedule == OMP_sch_static;
2653 }
2654 
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2655 bool CGOpenMPRuntime::isStaticNonchunked(
2656     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2657   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2658   return Schedule == OMP_dist_sch_static;
2659 }
2660 
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2661 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2662                                       bool Chunked) const {
2663   OpenMPSchedType Schedule =
2664       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2665   return Schedule == OMP_sch_static_chunked;
2666 }
2667 
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2668 bool CGOpenMPRuntime::isStaticChunked(
2669     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2670   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2671   return Schedule == OMP_dist_sch_static_chunked;
2672 }
2673 
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2674 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2675   OpenMPSchedType Schedule =
2676       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2677   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2678   return Schedule != OMP_sch_static;
2679 }
2680 
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2681 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2682                                   OpenMPScheduleClauseModifier M1,
2683                                   OpenMPScheduleClauseModifier M2) {
2684   int Modifier = 0;
2685   switch (M1) {
2686   case OMPC_SCHEDULE_MODIFIER_monotonic:
2687     Modifier = OMP_sch_modifier_monotonic;
2688     break;
2689   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2690     Modifier = OMP_sch_modifier_nonmonotonic;
2691     break;
2692   case OMPC_SCHEDULE_MODIFIER_simd:
2693     if (Schedule == OMP_sch_static_chunked)
2694       Schedule = OMP_sch_static_balanced_chunked;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_last:
2697   case OMPC_SCHEDULE_MODIFIER_unknown:
2698     break;
2699   }
2700   switch (M2) {
2701   case OMPC_SCHEDULE_MODIFIER_monotonic:
2702     Modifier = OMP_sch_modifier_monotonic;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2705     Modifier = OMP_sch_modifier_nonmonotonic;
2706     break;
2707   case OMPC_SCHEDULE_MODIFIER_simd:
2708     if (Schedule == OMP_sch_static_chunked)
2709       Schedule = OMP_sch_static_balanced_chunked;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_last:
2712   case OMPC_SCHEDULE_MODIFIER_unknown:
2713     break;
2714   }
2715   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2716   // If the static schedule kind is specified or if the ordered clause is
2717   // specified, and if the nonmonotonic modifier is not specified, the effect is
2718   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2719   // modifier is specified, the effect is as if the nonmonotonic modifier is
2720   // specified.
2721   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2722     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2723           Schedule == OMP_sch_static_balanced_chunked ||
2724           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2725           Schedule == OMP_dist_sch_static_chunked ||
2726           Schedule == OMP_dist_sch_static))
2727       Modifier = OMP_sch_modifier_nonmonotonic;
2728   }
2729   return Schedule | Modifier;
2730 }
2731 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2732 void CGOpenMPRuntime::emitForDispatchInit(
2733     CodeGenFunction &CGF, SourceLocation Loc,
2734     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2735     bool Ordered, const DispatchRTInput &DispatchValues) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738   OpenMPSchedType Schedule = getRuntimeSchedule(
2739       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2740   assert(Ordered ||
2741          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2742           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2743           Schedule != OMP_sch_static_balanced_chunked));
2744   // Call __kmpc_dispatch_init(
2745   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2746   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2747   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2748 
2749   // If the Chunk was not specified in the clause - use default value 1.
2750   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2751                                             : CGF.Builder.getIntN(IVSize, 1);
2752   llvm::Value *Args[] = {
2753       emitUpdateLocation(CGF, Loc),
2754       getThreadID(CGF, Loc),
2755       CGF.Builder.getInt32(addMonoNonMonoModifier(
2756           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2757       DispatchValues.LB,                                     // Lower
2758       DispatchValues.UB,                                     // Upper
2759       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2760       Chunk                                                  // Chunk
2761   };
2762   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2763 }
2764 
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2765 static void emitForStaticInitCall(
2766     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2767     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2768     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2769     const CGOpenMPRuntime::StaticRTInput &Values) {
2770   if (!CGF.HaveInsertPoint())
2771     return;
2772 
2773   assert(!Values.Ordered);
2774   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2775          Schedule == OMP_sch_static_balanced_chunked ||
2776          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2777          Schedule == OMP_dist_sch_static ||
2778          Schedule == OMP_dist_sch_static_chunked);
2779 
2780   // Call __kmpc_for_static_init(
2781   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2782   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2783   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2784   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2785   llvm::Value *Chunk = Values.Chunk;
2786   if (Chunk == nullptr) {
2787     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2788             Schedule == OMP_dist_sch_static) &&
2789            "expected static non-chunked schedule");
2790     // If the Chunk was not specified in the clause - use default value 1.
2791     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2792   } else {
2793     assert((Schedule == OMP_sch_static_chunked ||
2794             Schedule == OMP_sch_static_balanced_chunked ||
2795             Schedule == OMP_ord_static_chunked ||
2796             Schedule == OMP_dist_sch_static_chunked) &&
2797            "expected static chunked schedule");
2798   }
2799   llvm::Value *Args[] = {
2800       UpdateLocation,
2801       ThreadId,
2802       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2803                                                   M2)), // Schedule type
2804       Values.IL.getPointer(),                           // &isLastIter
2805       Values.LB.getPointer(),                           // &LB
2806       Values.UB.getPointer(),                           // &UB
2807       Values.ST.getPointer(),                           // &Stride
2808       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2809       Chunk                                             // Chunk
2810   };
2811   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2812 }
2813 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2814 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2815                                         SourceLocation Loc,
2816                                         OpenMPDirectiveKind DKind,
2817                                         const OpenMPScheduleTy &ScheduleKind,
2818                                         const StaticRTInput &Values) {
2819   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2820       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2821   assert(isOpenMPWorksharingDirective(DKind) &&
2822          "Expected loop-based or sections-based directive.");
2823   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2824                                              isOpenMPLoopDirective(DKind)
2825                                                  ? OMP_IDENT_WORK_LOOP
2826                                                  : OMP_IDENT_WORK_SECTIONS);
2827   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2828   llvm::FunctionCallee StaticInitFunction =
2829       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2830   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2832                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2833 }
2834 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2835 void CGOpenMPRuntime::emitDistributeStaticInit(
2836     CodeGenFunction &CGF, SourceLocation Loc,
2837     OpenMPDistScheduleClauseKind SchedKind,
2838     const CGOpenMPRuntime::StaticRTInput &Values) {
2839   OpenMPSchedType ScheduleNum =
2840       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2841   llvm::Value *UpdatedLocation =
2842       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2843   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844   llvm::FunctionCallee StaticInitFunction =
2845       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2846   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2847                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2848                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2849 }
2850 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2851 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2852                                           SourceLocation Loc,
2853                                           OpenMPDirectiveKind DKind) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {
2858       emitUpdateLocation(CGF, Loc,
2859                          isOpenMPDistributeDirective(DKind)
2860                              ? OMP_IDENT_WORK_DISTRIBUTE
2861                              : isOpenMPLoopDirective(DKind)
2862                                    ? OMP_IDENT_WORK_LOOP
2863                                    : OMP_IDENT_WORK_SECTIONS),
2864       getThreadID(CGF, Loc)};
2865   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868                       Args);
2869 }
2870 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2871 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2872                                                  SourceLocation Loc,
2873                                                  unsigned IVSize,
2874                                                  bool IVSigned) {
2875   if (!CGF.HaveInsertPoint())
2876     return;
2877   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2878   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2879   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2880 }
2881 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2882 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2883                                           SourceLocation Loc, unsigned IVSize,
2884                                           bool IVSigned, Address IL,
2885                                           Address LB, Address UB,
2886                                           Address ST) {
2887   // Call __kmpc_dispatch_next(
2888   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2889   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2890   //          kmp_int[32|64] *p_stride);
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc),
2893       getThreadID(CGF, Loc),
2894       IL.getPointer(), // &isLastIter
2895       LB.getPointer(), // &Lower
2896       UB.getPointer(), // &Upper
2897       ST.getPointer()  // &Stride
2898   };
2899   llvm::Value *Call =
2900       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2901   return CGF.EmitScalarConversion(
2902       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2903       CGF.getContext().BoolTy, Loc);
2904 }
2905 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2906 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2907                                            llvm::Value *NumThreads,
2908                                            SourceLocation Loc) {
2909   if (!CGF.HaveInsertPoint())
2910     return;
2911   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2914       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2915   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2916                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2917                       Args);
2918 }
2919 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2920 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2921                                          ProcBindKind ProcBind,
2922                                          SourceLocation Loc) {
2923   if (!CGF.HaveInsertPoint())
2924     return;
2925   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2926   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2927   llvm::Value *Args[] = {
2928       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2929       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2930   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2931                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2932                       Args);
2933 }
2934 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2935 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2936                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2937   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2938     OMPBuilder.createFlush(CGF.Builder);
2939   } else {
2940     if (!CGF.HaveInsertPoint())
2941       return;
2942     // Build call void __kmpc_flush(ident_t *loc)
2943     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2944                             CGM.getModule(), OMPRTL___kmpc_flush),
2945                         emitUpdateLocation(CGF, Loc));
2946   }
2947 }
2948 
2949 namespace {
2950 /// Indexes of fields for type kmp_task_t.
2951 enum KmpTaskTFields {
2952   /// List of shared variables.
2953   KmpTaskTShareds,
2954   /// Task routine.
2955   KmpTaskTRoutine,
2956   /// Partition id for the untied tasks.
2957   KmpTaskTPartId,
2958   /// Function with call of destructors for private variables.
2959   Data1,
2960   /// Task priority.
2961   Data2,
2962   /// (Taskloops only) Lower bound.
2963   KmpTaskTLowerBound,
2964   /// (Taskloops only) Upper bound.
2965   KmpTaskTUpperBound,
2966   /// (Taskloops only) Stride.
2967   KmpTaskTStride,
2968   /// (Taskloops only) Is last iteration flag.
2969   KmpTaskTLastIter,
2970   /// (Taskloops only) Reduction data.
2971   KmpTaskTReductions,
2972 };
2973 } // anonymous namespace
2974 
empty() const2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2976   return OffloadEntriesTargetRegion.empty() &&
2977          OffloadEntriesDeviceGlobalVar.empty();
2978 }
2979 
2980 /// Initialize target region entry.
2981 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2982     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2983                                     StringRef ParentName, unsigned LineNum,
2984                                     unsigned Order) {
2985   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2986                                              "only required for the device "
2987                                              "code generation.");
2988   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2989       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2990                                    OMPTargetRegionEntryTargetRegion);
2991   ++OffloadingEntriesNum;
2992 }
2993 
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)2995     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2996                                   StringRef ParentName, unsigned LineNum,
2997                                   llvm::Constant *Addr, llvm::Constant *ID,
2998                                   OMPTargetRegionEntryKind Flags) {
2999   // If we are emitting code for a target, the entry is already initialized,
3000   // only has to be registered.
3001   if (CGM.getLangOpts().OpenMPIsDevice) {
3002     // This could happen if the device compilation is invoked standalone.
3003     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3004       return;
3005     auto &Entry =
3006         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3007     Entry.setAddress(Addr);
3008     Entry.setID(ID);
3009     Entry.setFlags(Flags);
3010   } else {
3011     if (Flags ==
3012             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3013         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3014                                  /*IgnoreAddressId*/ true))
3015       return;
3016     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3017            "Target region entry already registered!");
3018     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3019     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3020     ++OffloadingEntriesNum;
3021   }
3022 }
3023 
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,bool IgnoreAddressId) const3024 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3025     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3026     bool IgnoreAddressId) const {
3027   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3028   if (PerDevice == OffloadEntriesTargetRegion.end())
3029     return false;
3030   auto PerFile = PerDevice->second.find(FileID);
3031   if (PerFile == PerDevice->second.end())
3032     return false;
3033   auto PerParentName = PerFile->second.find(ParentName);
3034   if (PerParentName == PerFile->second.end())
3035     return false;
3036   auto PerLine = PerParentName->second.find(LineNum);
3037   if (PerLine == PerParentName->second.end())
3038     return false;
3039   // Fail if this entry is already registered.
3040   if (!IgnoreAddressId &&
3041       (PerLine->second.getAddress() || PerLine->second.getID()))
3042     return false;
3043   return true;
3044 }
3045 
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3047     const OffloadTargetRegionEntryInfoActTy &Action) {
3048   // Scan all target region entries and perform the provided action.
3049   for (const auto &D : OffloadEntriesTargetRegion)
3050     for (const auto &F : D.second)
3051       for (const auto &P : F.second)
3052         for (const auto &L : P.second)
3053           Action(D.first, F.first, P.first(), L.first, L.second);
3054 }
3055 
3056 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)3057     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3058                                        OMPTargetGlobalVarEntryKind Flags,
3059                                        unsigned Order) {
3060   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3061                                              "only required for the device "
3062                                              "code generation.");
3063   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3064   ++OffloadingEntriesNum;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)3068     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3069                                      CharUnits VarSize,
3070                                      OMPTargetGlobalVarEntryKind Flags,
3071                                      llvm::GlobalValue::LinkageTypes Linkage) {
3072   if (CGM.getLangOpts().OpenMPIsDevice) {
3073     // This could happen if the device compilation is invoked standalone.
3074     if (!hasDeviceGlobalVarEntryInfo(VarName))
3075       return;
3076     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3077     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3078       if (Entry.getVarSize().isZero()) {
3079         Entry.setVarSize(VarSize);
3080         Entry.setLinkage(Linkage);
3081       }
3082       return;
3083     }
3084     Entry.setVarSize(VarSize);
3085     Entry.setLinkage(Linkage);
3086     Entry.setAddress(Addr);
3087   } else {
3088     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3089       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3090       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3091              "Entry not initialized!");
3092       if (Entry.getVarSize().isZero()) {
3093         Entry.setVarSize(VarSize);
3094         Entry.setLinkage(Linkage);
3095       }
3096       return;
3097     }
3098     OffloadEntriesDeviceGlobalVar.try_emplace(
3099         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3100     ++OffloadingEntriesNum;
3101   }
3102 }
3103 
3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)3105     actOnDeviceGlobalVarEntriesInfo(
3106         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3107   // Scan all target region entries and perform the provided action.
3108   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3109     Action(E.getKey(), E.getValue());
3110 }
3111 
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)3112 void CGOpenMPRuntime::createOffloadEntry(
3113     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3114     llvm::GlobalValue::LinkageTypes Linkage) {
3115   StringRef Name = Addr->getName();
3116   llvm::Module &M = CGM.getModule();
3117   llvm::LLVMContext &C = M.getContext();
3118 
3119   // Create constant string with the name.
3120   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3121 
3122   std::string StringName = getName({"omp_offloading", "entry_name"});
3123   auto *Str = new llvm::GlobalVariable(
3124       M, StrPtrInit->getType(), /*isConstant=*/true,
3125       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3126   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3127 
3128   llvm::Constant *Data[] = {
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3130       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3131       llvm::ConstantInt::get(CGM.SizeTy, Size),
3132       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3133       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3134   std::string EntryName = getName({"omp_offloading", "entry", ""});
3135   llvm::GlobalVariable *Entry = createGlobalStruct(
3136       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3137       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3138 
3139   // The entry has to be created in the section the linker expects it to be.
3140   Entry->setSection("omp_offloading_entries");
3141 }
3142 
createOffloadEntriesAndInfoMetadata()3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3144   // Emit the offloading entries and metadata so that the device codegen side
3145   // can easily figure out what to emit. The produced metadata looks like
3146   // this:
3147   //
3148   // !omp_offload.info = !{!1, ...}
3149   //
3150   // Right now we only generate metadata for function that contain target
3151   // regions.
3152 
3153   // If we are in simd mode or there are no entries, we don't need to do
3154   // anything.
3155   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3156     return;
3157 
3158   llvm::Module &M = CGM.getModule();
3159   llvm::LLVMContext &C = M.getContext();
3160   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3161                          SourceLocation, StringRef>,
3162               16>
3163       OrderedEntries(OffloadEntriesInfoManager.size());
3164   llvm::SmallVector<StringRef, 16> ParentFunctions(
3165       OffloadEntriesInfoManager.size());
3166 
3167   // Auxiliary methods to create metadata values and strings.
3168   auto &&GetMDInt = [this](unsigned V) {
3169     return llvm::ConstantAsMetadata::get(
3170         llvm::ConstantInt::get(CGM.Int32Ty, V));
3171   };
3172 
3173   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3174 
3175   // Create the offloading info metadata node.
3176   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3177 
3178   // Create function that emits metadata for each target region entry;
3179   auto &&TargetRegionMetadataEmitter =
3180       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3181        &GetMDString](
3182           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3183           unsigned Line,
3184           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3185         // Generate metadata for target regions. Each entry of this metadata
3186         // contains:
3187         // - Entry 0 -> Kind of this type of metadata (0).
3188         // - Entry 1 -> Device ID of the file where the entry was identified.
3189         // - Entry 2 -> File ID of the file where the entry was identified.
3190         // - Entry 3 -> Mangled name of the function where the entry was
3191         // identified.
3192         // - Entry 4 -> Line in the file where the entry was identified.
3193         // - Entry 5 -> Order the entry was created.
3194         // The first element of the metadata node is the kind.
3195         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3196                                  GetMDInt(FileID),      GetMDString(ParentName),
3197                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3198 
3199         SourceLocation Loc;
3200         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3201                   E = CGM.getContext().getSourceManager().fileinfo_end();
3202              I != E; ++I) {
3203           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3204               I->getFirst()->getUniqueID().getFile() == FileID) {
3205             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3206                 I->getFirst(), Line, 1);
3207             break;
3208           }
3209         }
3210         // Save this entry in the right position of the ordered entries array.
3211         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3212         ParentFunctions[E.getOrder()] = ParentName;
3213 
3214         // Add metadata to the named metadata node.
3215         MD->addOperand(llvm::MDNode::get(C, Ops));
3216       };
3217 
3218   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3219       TargetRegionMetadataEmitter);
3220 
3221   // Create function that emits metadata for each device global variable entry;
3222   auto &&DeviceGlobalVarMetadataEmitter =
3223       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3224        MD](StringRef MangledName,
3225            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3226                &E) {
3227         // Generate metadata for global variables. Each entry of this metadata
3228         // contains:
3229         // - Entry 0 -> Kind of this type of metadata (1).
3230         // - Entry 1 -> Mangled name of the variable.
3231         // - Entry 2 -> Declare target kind.
3232         // - Entry 3 -> Order the entry was created.
3233         // The first element of the metadata node is the kind.
3234         llvm::Metadata *Ops[] = {
3235             GetMDInt(E.getKind()), GetMDString(MangledName),
3236             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3237 
3238         // Save this entry in the right position of the ordered entries array.
3239         OrderedEntries[E.getOrder()] =
3240             std::make_tuple(&E, SourceLocation(), MangledName);
3241 
3242         // Add metadata to the named metadata node.
3243         MD->addOperand(llvm::MDNode::get(C, Ops));
3244       };
3245 
3246   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3247       DeviceGlobalVarMetadataEmitter);
3248 
3249   for (const auto &E : OrderedEntries) {
3250     assert(std::get<0>(E) && "All ordered entries must exist!");
3251     if (const auto *CE =
3252             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3253                 std::get<0>(E))) {
3254       if (!CE->getID() || !CE->getAddress()) {
3255         // Do not blame the entry if the parent funtion is not emitted.
3256         StringRef FnName = ParentFunctions[CE->getOrder()];
3257         if (!CGM.GetGlobalValue(FnName))
3258           continue;
3259         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3260             DiagnosticsEngine::Error,
3261             "Offloading entry for target region in %0 is incorrect: either the "
3262             "address or the ID is invalid.");
3263         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3264         continue;
3265       }
3266       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3267                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3268     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3269                                              OffloadEntryInfoDeviceGlobalVar>(
3270                    std::get<0>(E))) {
3271       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3272           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3273               CE->getFlags());
3274       switch (Flags) {
3275       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3276         if (CGM.getLangOpts().OpenMPIsDevice &&
3277             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3278           continue;
3279         if (!CE->getAddress()) {
3280           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281               DiagnosticsEngine::Error, "Offloading entry for declare target "
3282                                         "variable %0 is incorrect: the "
3283                                         "address is invalid.");
3284           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3285           continue;
3286         }
3287         // The vaiable has no definition - no need to add the entry.
3288         if (CE->getVarSize().isZero())
3289           continue;
3290         break;
3291       }
3292       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3293         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3294                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3295                "Declaret target link address is set.");
3296         if (CGM.getLangOpts().OpenMPIsDevice)
3297           continue;
3298         if (!CE->getAddress()) {
3299           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3300               DiagnosticsEngine::Error,
3301               "Offloading entry for declare target variable is incorrect: the "
3302               "address is invalid.");
3303           CGM.getDiags().Report(DiagID);
3304           continue;
3305         }
3306         break;
3307       }
3308       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3309                          CE->getVarSize().getQuantity(), Flags,
3310                          CE->getLinkage());
3311     } else {
3312       llvm_unreachable("Unsupported entry kind.");
3313     }
3314   }
3315 }
3316 
3317 /// Loads all the offload entries information from the host IR
3318 /// metadata.
loadOffloadInfoMetadata()3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3320   // If we are in target mode, load the metadata from the host IR. This code has
3321   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3322 
3323   if (!CGM.getLangOpts().OpenMPIsDevice)
3324     return;
3325 
3326   if (CGM.getLangOpts().OMPHostIRFile.empty())
3327     return;
3328 
3329   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3330   if (auto EC = Buf.getError()) {
3331     CGM.getDiags().Report(diag::err_cannot_open_file)
3332         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3333     return;
3334   }
3335 
3336   llvm::LLVMContext C;
3337   auto ME = expectedToErrorOrAndEmitErrors(
3338       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3339 
3340   if (auto EC = ME.getError()) {
3341     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3342         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3343     CGM.getDiags().Report(DiagID)
3344         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3345     return;
3346   }
3347 
3348   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3349   if (!MD)
3350     return;
3351 
3352   for (llvm::MDNode *MN : MD->operands()) {
3353     auto &&GetMDInt = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3355       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3356     };
3357 
3358     auto &&GetMDString = [MN](unsigned Idx) {
3359       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3360       return V->getString();
3361     };
3362 
3363     switch (GetMDInt(0)) {
3364     default:
3365       llvm_unreachable("Unexpected metadata!");
3366       break;
3367     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3368         OffloadingEntryInfoTargetRegion:
3369       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3370           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3371           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3372           /*Order=*/GetMDInt(5));
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoDeviceGlobalVar:
3376       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3377           /*MangledName=*/GetMDString(1),
3378           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3379               /*Flags=*/GetMDInt(2)),
3380           /*Order=*/GetMDInt(3));
3381       break;
3382     }
3383   }
3384 }
3385 
emitKmpRoutineEntryT(QualType KmpInt32Ty)3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3387   if (!KmpRoutineEntryPtrTy) {
3388     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3389     ASTContext &C = CGM.getContext();
3390     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3391     FunctionProtoType::ExtProtoInfo EPI;
3392     KmpRoutineEntryPtrQTy = C.getPointerType(
3393         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3394     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3395   }
3396 }
3397 
getTgtOffloadEntryQTy()3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3399   // Make sure the type of the entry is already created. This is the type we
3400   // have to create:
3401   // struct __tgt_offload_entry{
3402   //   void      *addr;       // Pointer to the offload entry info.
3403   //                          // (function or global)
3404   //   char      *name;       // Name of the function or global.
3405   //   size_t     size;       // Size of the entry info (0 if it a function).
3406   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3407   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3408   // };
3409   if (TgtOffloadEntryQTy.isNull()) {
3410     ASTContext &C = CGM.getContext();
3411     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3412     RD->startDefinition();
3413     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3414     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3415     addFieldToRecordDecl(C, RD, C.getSizeType());
3416     addFieldToRecordDecl(
3417         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3418     addFieldToRecordDecl(
3419         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3420     RD->completeDefinition();
3421     RD->addAttr(PackedAttr::CreateImplicit(C));
3422     TgtOffloadEntryQTy = C.getRecordType(RD);
3423   }
3424   return TgtOffloadEntryQTy;
3425 }
3426 
3427 namespace {
3428 struct PrivateHelpersTy {
PrivateHelpersTy__anone1a752751611::PrivateHelpersTy3429   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3430                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3431       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3432         PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy__anone1a752751611::PrivateHelpersTy3433   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3434   const Expr *OriginalRef = nullptr;
3435   const VarDecl *Original = nullptr;
3436   const VarDecl *PrivateCopy = nullptr;
3437   const VarDecl *PrivateElemInit = nullptr;
isLocalPrivate__anone1a752751611::PrivateHelpersTy3438   bool isLocalPrivate() const {
3439     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3440   }
3441 };
3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3443 } // anonymous namespace
3444 
isAllocatableDecl(const VarDecl * VD)3445 static bool isAllocatableDecl(const VarDecl *VD) {
3446   const VarDecl *CVD = VD->getCanonicalDecl();
3447   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3448     return false;
3449   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3450   // Use the default allocation.
3451   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3452             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3453            !AA->getAllocator());
3454 }
3455 
3456 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3458   if (!Privates.empty()) {
3459     ASTContext &C = CGM.getContext();
3460     // Build struct .kmp_privates_t. {
3461     //         /*  private vars  */
3462     //       };
3463     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3464     RD->startDefinition();
3465     for (const auto &Pair : Privates) {
3466       const VarDecl *VD = Pair.second.Original;
3467       QualType Type = VD->getType().getNonReferenceType();
3468       // If the private variable is a local variable with lvalue ref type,
3469       // allocate the pointer instead of the pointee type.
3470       if (Pair.second.isLocalPrivate()) {
3471         if (VD->getType()->isLValueReferenceType())
3472           Type = C.getPointerType(Type);
3473         if (isAllocatableDecl(VD))
3474           Type = C.getPointerType(Type);
3475       }
3476       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3477       if (VD->hasAttrs()) {
3478         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3479              E(VD->getAttrs().end());
3480              I != E; ++I)
3481           FD->addAttr(*I);
3482       }
3483     }
3484     RD->completeDefinition();
3485     return RD;
3486   }
3487   return nullptr;
3488 }
3489 
3490 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3492                          QualType KmpInt32Ty,
3493                          QualType KmpRoutineEntryPointerQTy) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t {
3496   //         void *              shareds;
3497   //         kmp_routine_entry_t routine;
3498   //         kmp_int32           part_id;
3499   //         kmp_cmplrdata_t data1;
3500   //         kmp_cmplrdata_t data2;
3501   // For taskloops additional fields:
3502   //         kmp_uint64          lb;
3503   //         kmp_uint64          ub;
3504   //         kmp_int64           st;
3505   //         kmp_int32           liter;
3506   //         void *              reductions;
3507   //       };
3508   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3509   UD->startDefinition();
3510   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3511   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3512   UD->completeDefinition();
3513   QualType KmpCmplrdataTy = C.getRecordType(UD);
3514   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3515   RD->startDefinition();
3516   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3517   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3518   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3521   if (isOpenMPTaskLoopDirective(Kind)) {
3522     QualType KmpUInt64Ty =
3523         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3524     QualType KmpInt64Ty =
3525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3529     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3530     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3531   }
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3538                                      ArrayRef<PrivateDataTy> Privates) {
3539   ASTContext &C = CGM.getContext();
3540   // Build struct kmp_task_t_with_privates {
3541   //         kmp_task_t task_data;
3542   //         .kmp_privates_t. privates;
3543   //       };
3544   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3545   RD->startDefinition();
3546   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3547   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3548     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 /// Emit a proxy function which accepts kmp_task_t as the second
3554 /// argument.
3555 /// \code
3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3557 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3558 ///   For taskloops:
3559 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3560 ///   tt->reductions, tt->shareds);
3561 ///   return 0;
3562 /// }
3563 /// \endcode
3564 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3566                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3567                       QualType KmpTaskTWithPrivatesPtrQTy,
3568                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3569                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3570                       llvm::Value *TaskPrivatesMap) {
3571   ASTContext &C = CGM.getContext();
3572   FunctionArgList Args;
3573   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3574                             ImplicitParamDecl::Other);
3575   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3576                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3577                                 ImplicitParamDecl::Other);
3578   Args.push_back(&GtidArg);
3579   Args.push_back(&TaskTypeArg);
3580   const auto &TaskEntryFnInfo =
3581       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3582   llvm::FunctionType *TaskEntryTy =
3583       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3584   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3585   auto *TaskEntry = llvm::Function::Create(
3586       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3587   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3588   TaskEntry->setDoesNotRecurse();
3589   CodeGenFunction CGF(CGM);
3590   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3591                     Loc, Loc);
3592 
3593   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3594   // tt,
3595   // For taskloops:
3596   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3597   // tt->task_data.shareds);
3598   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3599       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3600   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3601       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3602       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3603   const auto *KmpTaskTWithPrivatesQTyRD =
3604       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3605   LValue Base =
3606       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3607   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3608   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3609   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3610   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3611 
3612   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3613   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3614   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3615       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3616       CGF.ConvertTypeForMem(SharedsPtrTy));
3617 
3618   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3619   llvm::Value *PrivatesParam;
3620   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3621     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3622     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3623         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3624   } else {
3625     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3626   }
3627 
3628   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3629                                TaskPrivatesMap,
3630                                CGF.Builder
3631                                    .CreatePointerBitCastOrAddrSpaceCast(
3632                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3633                                    .getPointer()};
3634   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3635                                           std::end(CommonArgs));
3636   if (isOpenMPTaskLoopDirective(Kind)) {
3637     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3638     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3639     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3640     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3641     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3642     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3643     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3644     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3645     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3646     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3647     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3648     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3649     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3650     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3651     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3652     CallArgs.push_back(LBParam);
3653     CallArgs.push_back(UBParam);
3654     CallArgs.push_back(StParam);
3655     CallArgs.push_back(LIParam);
3656     CallArgs.push_back(RParam);
3657   }
3658   CallArgs.push_back(SharedsParam);
3659 
3660   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3661                                                   CallArgs);
3662   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3663                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3664   CGF.FinishFunction();
3665   return TaskEntry;
3666 }
3667 
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3669                                             SourceLocation Loc,
3670                                             QualType KmpInt32Ty,
3671                                             QualType KmpTaskTWithPrivatesPtrQTy,
3672                                             QualType KmpTaskTWithPrivatesQTy) {
3673   ASTContext &C = CGM.getContext();
3674   FunctionArgList Args;
3675   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3676                             ImplicitParamDecl::Other);
3677   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3678                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3679                                 ImplicitParamDecl::Other);
3680   Args.push_back(&GtidArg);
3681   Args.push_back(&TaskTypeArg);
3682   const auto &DestructorFnInfo =
3683       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3684   llvm::FunctionType *DestructorFnTy =
3685       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3686   std::string Name =
3687       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3688   auto *DestructorFn =
3689       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3690                              Name, &CGM.getModule());
3691   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3692                                     DestructorFnInfo);
3693   DestructorFn->setDoesNotRecurse();
3694   CodeGenFunction CGF(CGM);
3695   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3696                     Args, Loc, Loc);
3697 
3698   LValue Base = CGF.EmitLoadOfPointerLValue(
3699       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3700       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3701   const auto *KmpTaskTWithPrivatesQTyRD =
3702       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3703   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3704   Base = CGF.EmitLValueForField(Base, *FI);
3705   for (const auto *Field :
3706        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3707     if (QualType::DestructionKind DtorKind =
3708             Field->getType().isDestructedType()) {
3709       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3710       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3711     }
3712   }
3713   CGF.FinishFunction();
3714   return DestructorFn;
3715 }
3716 
3717 /// Emit a privates mapping function for correct handling of private and
3718 /// firstprivate variables.
3719 /// \code
3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3721 /// **noalias priv1,...,  <tyn> **noalias privn) {
3722 ///   *priv1 = &.privates.priv1;
3723 ///   ...;
3724 ///   *privn = &.privates.privn;
3725 /// }
3726 /// \endcode
3727 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3729                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3730                                ArrayRef<PrivateDataTy> Privates) {
3731   ASTContext &C = CGM.getContext();
3732   FunctionArgList Args;
3733   ImplicitParamDecl TaskPrivatesArg(
3734       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3735       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3736       ImplicitParamDecl::Other);
3737   Args.push_back(&TaskPrivatesArg);
3738   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3739   unsigned Counter = 1;
3740   for (const Expr *E : Data.PrivateVars) {
3741     Args.push_back(ImplicitParamDecl::Create(
3742         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3743         C.getPointerType(C.getPointerType(E->getType()))
3744             .withConst()
3745             .withRestrict(),
3746         ImplicitParamDecl::Other));
3747     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3748     PrivateVarsPos[VD] = Counter;
3749     ++Counter;
3750   }
3751   for (const Expr *E : Data.FirstprivateVars) {
3752     Args.push_back(ImplicitParamDecl::Create(
3753         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754         C.getPointerType(C.getPointerType(E->getType()))
3755             .withConst()
3756             .withRestrict(),
3757         ImplicitParamDecl::Other));
3758     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3759     PrivateVarsPos[VD] = Counter;
3760     ++Counter;
3761   }
3762   for (const Expr *E : Data.LastprivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const VarDecl *VD : Data.PrivateLocals) {
3774     QualType Ty = VD->getType().getNonReferenceType();
3775     if (VD->getType()->isLValueReferenceType())
3776       Ty = C.getPointerType(Ty);
3777     if (isAllocatableDecl(VD))
3778       Ty = C.getPointerType(Ty);
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3782         ImplicitParamDecl::Other));
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   const auto &TaskPrivatesMapFnInfo =
3787       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788   llvm::FunctionType *TaskPrivatesMapTy =
3789       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790   std::string Name =
3791       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792   auto *TaskPrivatesMap = llvm::Function::Create(
3793       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794       &CGM.getModule());
3795   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796                                     TaskPrivatesMapFnInfo);
3797   if (CGM.getLangOpts().Optimize) {
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801   }
3802   CodeGenFunction CGF(CGM);
3803   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805 
3806   // *privi = &.privates.privi;
3807   LValue Base = CGF.EmitLoadOfPointerLValue(
3808       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809       TaskPrivatesArg.getType()->castAs<PointerType>());
3810   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811   Counter = 0;
3812   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815     LValue RefLVal =
3816         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820     ++Counter;
3821   }
3822   CGF.FinishFunction();
3823   return TaskPrivatesMap;
3824 }
3825 
3826 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828                              const OMPExecutableDirective &D,
3829                              Address KmpTaskSharedsPtr, LValue TDBase,
3830                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831                              QualType SharedsTy, QualType SharedsPtrTy,
3832                              const OMPTaskDataTy &Data,
3833                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834   ASTContext &C = CGF.getContext();
3835   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838                                  ? OMPD_taskloop
3839                                  : OMPD_task;
3840   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842   LValue SrcBase;
3843   bool IsTargetTask =
3844       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3847   // PointersArray, SizesArray, and MappersArray. The original variables for
3848   // these arrays are not captured and we get their addresses explicitly.
3849   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851     SrcBase = CGF.MakeAddrLValue(
3852         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854         SharedsTy);
3855   }
3856   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857   for (const PrivateDataTy &Pair : Privates) {
3858     // Do not initialize private locals.
3859     if (Pair.second.isLocalPrivate()) {
3860       ++FI;
3861       continue;
3862     }
3863     const VarDecl *VD = Pair.second.PrivateCopy;
3864     const Expr *Init = VD->getAnyInitializer();
3865     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3866                              !CGF.isTrivialInitializer(Init)))) {
3867       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3868       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3869         const VarDecl *OriginalVD = Pair.second.Original;
3870         // Check if the variable is the target-based BasePointersArray,
3871         // PointersArray, SizesArray, or MappersArray.
3872         LValue SharedRefLValue;
3873         QualType Type = PrivateLValue.getType();
3874         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3875         if (IsTargetTask && !SharedField) {
3876           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3877                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3878                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3879                          ->getNumParams() == 0 &&
3880                  isa<TranslationUnitDecl>(
3881                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3882                          ->getDeclContext()) &&
3883                  "Expected artificial target data variable.");
3884           SharedRefLValue =
3885               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3886         } else if (ForDup) {
3887           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3888           SharedRefLValue = CGF.MakeAddrLValue(
3889               Address(SharedRefLValue.getPointer(CGF),
3890                       C.getDeclAlign(OriginalVD)),
3891               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3892               SharedRefLValue.getTBAAInfo());
3893         } else if (CGF.LambdaCaptureFields.count(
3894                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3895                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3896           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3897         } else {
3898           // Processing for implicitly captured variables.
3899           InlinedOpenMPRegionRAII Region(
3900               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3901               /*HasCancel=*/false, /*NoInheritance=*/true);
3902           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3903         }
3904         if (Type->isArrayType()) {
3905           // Initialize firstprivate array.
3906           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3907             // Perform simple memcpy.
3908             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3909           } else {
3910             // Initialize firstprivate array using element-by-element
3911             // initialization.
3912             CGF.EmitOMPAggregateAssign(
3913                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3914                 Type,
3915                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3916                                                   Address SrcElement) {
3917                   // Clean up any temporaries needed by the initialization.
3918                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3919                   InitScope.addPrivate(
3920                       Elem, [SrcElement]() -> Address { return SrcElement; });
3921                   (void)InitScope.Privatize();
3922                   // Emit initialization for single element.
3923                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3924                       CGF, &CapturesInfo);
3925                   CGF.EmitAnyExprToMem(Init, DestElement,
3926                                        Init->getType().getQualifiers(),
3927                                        /*IsInitializer=*/false);
3928                 });
3929           }
3930         } else {
3931           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3932           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3933             return SharedRefLValue.getAddress(CGF);
3934           });
3935           (void)InitScope.Privatize();
3936           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3937           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3938                              /*capturedByInit=*/false);
3939         }
3940       } else {
3941         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3942       }
3943     }
3944     ++FI;
3945   }
3946 }
3947 
3948 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3949 static bool checkInitIsRequired(CodeGenFunction &CGF,
3950                                 ArrayRef<PrivateDataTy> Privates) {
3951   bool InitRequired = false;
3952   for (const PrivateDataTy &Pair : Privates) {
3953     if (Pair.second.isLocalPrivate())
3954       continue;
3955     const VarDecl *VD = Pair.second.PrivateCopy;
3956     const Expr *Init = VD->getAnyInitializer();
3957     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3958                                     !CGF.isTrivialInitializer(Init));
3959     if (InitRequired)
3960       break;
3961   }
3962   return InitRequired;
3963 }
3964 
3965 
3966 /// Emit task_dup function (for initialization of
3967 /// private/firstprivate/lastprivate vars and last_iter flag)
3968 /// \code
3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3970 /// lastpriv) {
3971 /// // setup lastprivate flag
3972 ///    task_dst->last = lastpriv;
3973 /// // could be constructor calls here...
3974 /// }
3975 /// \endcode
3976 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3978                     const OMPExecutableDirective &D,
3979                     QualType KmpTaskTWithPrivatesPtrQTy,
3980                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3981                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3982                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3983                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3984   ASTContext &C = CGM.getContext();
3985   FunctionArgList Args;
3986   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3987                            KmpTaskTWithPrivatesPtrQTy,
3988                            ImplicitParamDecl::Other);
3989   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3990                            KmpTaskTWithPrivatesPtrQTy,
3991                            ImplicitParamDecl::Other);
3992   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3993                                 ImplicitParamDecl::Other);
3994   Args.push_back(&DstArg);
3995   Args.push_back(&SrcArg);
3996   Args.push_back(&LastprivArg);
3997   const auto &TaskDupFnInfo =
3998       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3999   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4000   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4001   auto *TaskDup = llvm::Function::Create(
4002       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4003   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4004   TaskDup->setDoesNotRecurse();
4005   CodeGenFunction CGF(CGM);
4006   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4007                     Loc);
4008 
4009   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4010       CGF.GetAddrOfLocalVar(&DstArg),
4011       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4012   // task_dst->liter = lastpriv;
4013   if (WithLastIter) {
4014     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4015     LValue Base = CGF.EmitLValueForField(
4016         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4017     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4018     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4019         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4020     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4021   }
4022 
4023   // Emit initial values for private copies (if any).
4024   assert(!Privates.empty());
4025   Address KmpTaskSharedsPtr = Address::invalid();
4026   if (!Data.FirstprivateVars.empty()) {
4027     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028         CGF.GetAddrOfLocalVar(&SrcArg),
4029         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030     LValue Base = CGF.EmitLValueForField(
4031         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4032     KmpTaskSharedsPtr = Address(
4033         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4034                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4035                                                   KmpTaskTShareds)),
4036                              Loc),
4037         CGM.getNaturalTypeAlignment(SharedsTy));
4038   }
4039   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4040                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4041   CGF.FinishFunction();
4042   return TaskDup;
4043 }
4044 
4045 /// Checks if destructor function is required to be generated.
4046 /// \return true if cleanups are required, false otherwise.
4047 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4049                          ArrayRef<PrivateDataTy> Privates) {
4050   for (const PrivateDataTy &P : Privates) {
4051     if (P.second.isLocalPrivate())
4052       continue;
4053     QualType Ty = P.second.Original->getType().getNonReferenceType();
4054     if (Ty.isDestructedType())
4055       return true;
4056   }
4057   return false;
4058 }
4059 
4060 namespace {
4061 /// Loop generator for OpenMP iterator expression.
4062 class OMPIteratorGeneratorScope final
4063     : public CodeGenFunction::OMPPrivateScope {
4064   CodeGenFunction &CGF;
4065   const OMPIteratorExpr *E = nullptr;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4067   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4068   OMPIteratorGeneratorScope() = delete;
4069   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4070 
4071 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)4072   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4073       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4074     if (!E)
4075       return;
4076     SmallVector<llvm::Value *, 4> Uppers;
4077     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4078       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4079       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4080       addPrivate(VD, [&CGF, VD]() {
4081         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4082       });
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4084       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4085         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4086                                  "counter.addr");
4087       });
4088     }
4089     Privatize();
4090 
4091     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4093       LValue CLVal =
4094           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4095                              HelperData.CounterVD->getType());
4096       // Counter = 0;
4097       CGF.EmitStoreOfScalar(
4098           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4099           CLVal);
4100       CodeGenFunction::JumpDest &ContDest =
4101           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4102       CodeGenFunction::JumpDest &ExitDest =
4103           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4104       // N = <number-of_iterations>;
4105       llvm::Value *N = Uppers[I];
4106       // cont:
4107       // if (Counter < N) goto body; else goto exit;
4108       CGF.EmitBlock(ContDest.getBlock());
4109       auto *CVal =
4110           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4111       llvm::Value *Cmp =
4112           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4113               ? CGF.Builder.CreateICmpSLT(CVal, N)
4114               : CGF.Builder.CreateICmpULT(CVal, N);
4115       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4116       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4117       // body:
4118       CGF.EmitBlock(BodyBB);
4119       // Iteri = Begini + Counter * Stepi;
4120       CGF.EmitIgnoredExpr(HelperData.Update);
4121     }
4122   }
~OMPIteratorGeneratorScope()4123   ~OMPIteratorGeneratorScope() {
4124     if (!E)
4125       return;
4126     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4127       // Counter = Counter + 1;
4128       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4129       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4130       // goto cont;
4131       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4132       // exit:
4133       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4134     }
4135   }
4136 };
4137 } // namespace
4138 
4139 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4141   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4142   llvm::Value *Addr;
4143   if (OASE) {
4144     const Expr *Base = OASE->getBase();
4145     Addr = CGF.EmitScalarExpr(Base);
4146   } else {
4147     Addr = CGF.EmitLValue(E).getPointer(CGF);
4148   }
4149   llvm::Value *SizeVal;
4150   QualType Ty = E->getType();
4151   if (OASE) {
4152     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4153     for (const Expr *SE : OASE->getDimensions()) {
4154       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4155       Sz = CGF.EmitScalarConversion(
4156           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4157       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4158     }
4159   } else if (const auto *ASE =
4160                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4161     LValue UpAddrLVal =
4162         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4163     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4164     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4165         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4166     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4167     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4168     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4169   } else {
4170     SizeVal = CGF.getTypeSize(Ty);
4171   }
4172   return std::make_pair(Addr, SizeVal);
4173 }
4174 
4175 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)4176 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4177   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4178   if (KmpTaskAffinityInfoTy.isNull()) {
4179     RecordDecl *KmpAffinityInfoRD =
4180         C.buildImplicitRecord("kmp_task_affinity_info_t");
4181     KmpAffinityInfoRD->startDefinition();
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4183     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4184     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4185     KmpAffinityInfoRD->completeDefinition();
4186     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4187   }
4188 }
4189 
4190 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4191 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4192                               const OMPExecutableDirective &D,
4193                               llvm::Function *TaskFunction, QualType SharedsTy,
4194                               Address Shareds, const OMPTaskDataTy &Data) {
4195   ASTContext &C = CGM.getContext();
4196   llvm::SmallVector<PrivateDataTy, 4> Privates;
4197   // Aggregate privates and sort them by the alignment.
4198   const auto *I = Data.PrivateCopies.begin();
4199   for (const Expr *E : Data.PrivateVars) {
4200     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4201     Privates.emplace_back(
4202         C.getDeclAlign(VD),
4203         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4204                          /*PrivateElemInit=*/nullptr));
4205     ++I;
4206   }
4207   I = Data.FirstprivateCopies.begin();
4208   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4209   for (const Expr *E : Data.FirstprivateVars) {
4210     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4211     Privates.emplace_back(
4212         C.getDeclAlign(VD),
4213         PrivateHelpersTy(
4214             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4215             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4216     ++I;
4217     ++IElemInitRef;
4218   }
4219   I = Data.LastprivateCopies.begin();
4220   for (const Expr *E : Data.LastprivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   for (const VarDecl *VD : Data.PrivateLocals) {
4229     if (isAllocatableDecl(VD))
4230       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4231     else
4232       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4233   }
4234   llvm::stable_sort(Privates,
4235                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4236                       return L.first > R.first;
4237                     });
4238   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4239   // Build type kmp_routine_entry_t (if not built yet).
4240   emitKmpRoutineEntryT(KmpInt32Ty);
4241   // Build type kmp_task_t (if not built yet).
4242   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4243     if (SavedKmpTaskloopTQTy.isNull()) {
4244       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4245           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4246     }
4247     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4248   } else {
4249     assert((D.getDirectiveKind() == OMPD_task ||
4250             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4251             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4252            "Expected taskloop, task or target directive");
4253     if (SavedKmpTaskTQTy.isNull()) {
4254       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4255           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4256     }
4257     KmpTaskTQTy = SavedKmpTaskTQTy;
4258   }
4259   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4260   // Build particular struct kmp_task_t for the given task.
4261   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4262       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4263   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4264   QualType KmpTaskTWithPrivatesPtrQTy =
4265       C.getPointerType(KmpTaskTWithPrivatesQTy);
4266   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4267   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4268       KmpTaskTWithPrivatesTy->getPointerTo();
4269   llvm::Value *KmpTaskTWithPrivatesTySize =
4270       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4271   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4272 
4273   // Emit initial values for private copies (if any).
4274   llvm::Value *TaskPrivatesMap = nullptr;
4275   llvm::Type *TaskPrivatesMapTy =
4276       std::next(TaskFunction->arg_begin(), 3)->getType();
4277   if (!Privates.empty()) {
4278     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4279     TaskPrivatesMap =
4280         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4281     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4282         TaskPrivatesMap, TaskPrivatesMapTy);
4283   } else {
4284     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4285         cast<llvm::PointerType>(TaskPrivatesMapTy));
4286   }
4287   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4288   // kmp_task_t *tt);
4289   llvm::Function *TaskEntry = emitProxyTaskFunction(
4290       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4291       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4292       TaskPrivatesMap);
4293 
4294   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4295   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4296   // kmp_routine_entry_t *task_entry);
4297   // Task flags. Format is taken from
4298   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4299   // description of kmp_tasking_flags struct.
4300   enum {
4301     TiedFlag = 0x1,
4302     FinalFlag = 0x2,
4303     DestructorsFlag = 0x8,
4304     PriorityFlag = 0x20,
4305     DetachableFlag = 0x40,
4306   };
4307   unsigned Flags = Data.Tied ? TiedFlag : 0;
4308   bool NeedsCleanup = false;
4309   if (!Privates.empty()) {
4310     NeedsCleanup =
4311         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4312     if (NeedsCleanup)
4313       Flags = Flags | DestructorsFlag;
4314   }
4315   if (Data.Priority.getInt())
4316     Flags = Flags | PriorityFlag;
4317   if (D.hasClausesOfKind<OMPDetachClause>())
4318     Flags = Flags | DetachableFlag;
4319   llvm::Value *TaskFlags =
4320       Data.Final.getPointer()
4321           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4322                                      CGF.Builder.getInt32(FinalFlag),
4323                                      CGF.Builder.getInt32(/*C=*/0))
4324           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4325   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4326   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4327   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4328       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4329       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4330           TaskEntry, KmpRoutineEntryPtrTy)};
4331   llvm::Value *NewTask;
4332   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4333     // Check if we have any device clause associated with the directive.
4334     const Expr *Device = nullptr;
4335     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4336       Device = C->getDevice();
4337     // Emit device ID if any otherwise use default value.
4338     llvm::Value *DeviceID;
4339     if (Device)
4340       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4341                                            CGF.Int64Ty, /*isSigned=*/true);
4342     else
4343       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4344     AllocArgs.push_back(DeviceID);
4345     NewTask = CGF.EmitRuntimeCall(
4346         OMPBuilder.getOrCreateRuntimeFunction(
4347             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4348         AllocArgs);
4349   } else {
4350     NewTask =
4351         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4352                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4353                             AllocArgs);
4354   }
4355   // Emit detach clause initialization.
4356   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4357   // task_descriptor);
4358   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4359     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4360     LValue EvtLVal = CGF.EmitLValue(Evt);
4361 
4362     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4363     // int gtid, kmp_task_t *task);
4364     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4365     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4366     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4367     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4370         {Loc, Tid, NewTask});
4371     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4372                                       Evt->getExprLoc());
4373     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4374   }
4375   // Process affinity clauses.
4376   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4377     // Process list of affinity data.
4378     ASTContext &C = CGM.getContext();
4379     Address AffinitiesArray = Address::invalid();
4380     // Calculate number of elements to form the array of affinity data.
4381     llvm::Value *NumOfElements = nullptr;
4382     unsigned NumAffinities = 0;
4383     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4384       if (const Expr *Modifier = C->getModifier()) {
4385         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4386         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4387           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4388           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4389           NumOfElements =
4390               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4391         }
4392       } else {
4393         NumAffinities += C->varlist_size();
4394       }
4395     }
4396     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4397     // Fields ids in kmp_task_affinity_info record.
4398     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4399 
4400     QualType KmpTaskAffinityInfoArrayTy;
4401     if (NumOfElements) {
4402       NumOfElements = CGF.Builder.CreateNUWAdd(
4403           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4404       OpaqueValueExpr OVE(
4405           Loc,
4406           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4407           VK_PRValue);
4408       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4409                                                     RValue::get(NumOfElements));
4410       KmpTaskAffinityInfoArrayTy =
4411           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4412                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4413       // Properly emit variable-sized array.
4414       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4415                                            ImplicitParamDecl::Other);
4416       CGF.EmitVarDecl(*PD);
4417       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4418       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4419                                                 /*isSigned=*/false);
4420     } else {
4421       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4422           KmpTaskAffinityInfoTy,
4423           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4424           ArrayType::Normal, /*IndexTypeQuals=*/0);
4425       AffinitiesArray =
4426           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4427       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4428       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4429                                              /*isSigned=*/false);
4430     }
4431 
4432     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4433     // Fill array by elements without iterators.
4434     unsigned Pos = 0;
4435     bool HasIterator = false;
4436     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4437       if (C->getModifier()) {
4438         HasIterator = true;
4439         continue;
4440       }
4441       for (const Expr *E : C->varlists()) {
4442         llvm::Value *Addr;
4443         llvm::Value *Size;
4444         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4445         LValue Base =
4446             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4447                                KmpTaskAffinityInfoTy);
4448         // affs[i].base_addr = &<Affinities[i].second>;
4449         LValue BaseAddrLVal = CGF.EmitLValueForField(
4450             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4451         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4452                               BaseAddrLVal);
4453         // affs[i].len = sizeof(<Affinities[i].second>);
4454         LValue LenLVal = CGF.EmitLValueForField(
4455             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4456         CGF.EmitStoreOfScalar(Size, LenLVal);
4457         ++Pos;
4458       }
4459     }
4460     LValue PosLVal;
4461     if (HasIterator) {
4462       PosLVal = CGF.MakeAddrLValue(
4463           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4464           C.getSizeType());
4465       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4466     }
4467     // Process elements with iterators.
4468     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4469       const Expr *Modifier = C->getModifier();
4470       if (!Modifier)
4471         continue;
4472       OMPIteratorGeneratorScope IteratorScope(
4473           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4474       for (const Expr *E : C->varlists()) {
4475         llvm::Value *Addr;
4476         llvm::Value *Size;
4477         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4478         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4479         LValue Base = CGF.MakeAddrLValue(
4480             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4481                                           AffinitiesArray.getPointer(), Idx),
4482                     AffinitiesArray.getAlignment()),
4483             KmpTaskAffinityInfoTy);
4484         // affs[i].base_addr = &<Affinities[i].second>;
4485         LValue BaseAddrLVal = CGF.EmitLValueForField(
4486             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4487         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4488                               BaseAddrLVal);
4489         // affs[i].len = sizeof(<Affinities[i].second>);
4490         LValue LenLVal = CGF.EmitLValueForField(
4491             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4492         CGF.EmitStoreOfScalar(Size, LenLVal);
4493         Idx = CGF.Builder.CreateNUWAdd(
4494             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4495         CGF.EmitStoreOfScalar(Idx, PosLVal);
4496       }
4497     }
4498     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4499     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4500     // naffins, kmp_task_affinity_info_t *affin_list);
4501     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4502     llvm::Value *GTid = getThreadID(CGF, Loc);
4503     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4504         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4505     // FIXME: Emit the function and ignore its result for now unless the
4506     // runtime function is properly implemented.
4507     (void)CGF.EmitRuntimeCall(
4508         OMPBuilder.getOrCreateRuntimeFunction(
4509             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4510         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4511   }
4512   llvm::Value *NewTaskNewTaskTTy =
4513       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4514           NewTask, KmpTaskTWithPrivatesPtrTy);
4515   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4516                                                KmpTaskTWithPrivatesQTy);
4517   LValue TDBase =
4518       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4519   // Fill the data in the resulting kmp_task_t record.
4520   // Copy shareds if there are any.
4521   Address KmpTaskSharedsPtr = Address::invalid();
4522   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4523     KmpTaskSharedsPtr =
4524         Address(CGF.EmitLoadOfScalar(
4525                     CGF.EmitLValueForField(
4526                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4527                                            KmpTaskTShareds)),
4528                     Loc),
4529                 CGM.getNaturalTypeAlignment(SharedsTy));
4530     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4531     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4532     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4533   }
4534   // Emit initial values for private copies (if any).
4535   TaskResultTy Result;
4536   if (!Privates.empty()) {
4537     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4538                      SharedsTy, SharedsPtrTy, Data, Privates,
4539                      /*ForDup=*/false);
4540     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4541         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4542       Result.TaskDupFn = emitTaskDupFunction(
4543           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4544           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4545           /*WithLastIter=*/!Data.LastprivateVars.empty());
4546     }
4547   }
4548   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4549   enum { Priority = 0, Destructors = 1 };
4550   // Provide pointer to function with destructors for privates.
4551   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4552   const RecordDecl *KmpCmplrdataUD =
4553       (*FI)->getType()->getAsUnionType()->getDecl();
4554   if (NeedsCleanup) {
4555     llvm::Value *DestructorFn = emitDestructorsFunction(
4556         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4557         KmpTaskTWithPrivatesQTy);
4558     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4559     LValue DestructorsLV = CGF.EmitLValueForField(
4560         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4561     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4562                               DestructorFn, KmpRoutineEntryPtrTy),
4563                           DestructorsLV);
4564   }
4565   // Set priority.
4566   if (Data.Priority.getInt()) {
4567     LValue Data2LV = CGF.EmitLValueForField(
4568         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4569     LValue PriorityLV = CGF.EmitLValueForField(
4570         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4571     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4572   }
4573   Result.NewTask = NewTask;
4574   Result.TaskEntry = TaskEntry;
4575   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4576   Result.TDBase = TDBase;
4577   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4578   return Result;
4579 }
4580 
4581 namespace {
4582 /// Dependence kind for RTL.
4583 enum RTLDependenceKindTy {
4584   DepIn = 0x01,
4585   DepInOut = 0x3,
4586   DepMutexInOutSet = 0x4
4587 };
4588 /// Fields ids in kmp_depend_info record.
4589 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4590 } // namespace
4591 
4592 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4593 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4594   RTLDependenceKindTy DepKind;
4595   switch (K) {
4596   case OMPC_DEPEND_in:
4597     DepKind = DepIn;
4598     break;
4599   // Out and InOut dependencies must use the same code.
4600   case OMPC_DEPEND_out:
4601   case OMPC_DEPEND_inout:
4602     DepKind = DepInOut;
4603     break;
4604   case OMPC_DEPEND_mutexinoutset:
4605     DepKind = DepMutexInOutSet;
4606     break;
4607   case OMPC_DEPEND_source:
4608   case OMPC_DEPEND_sink:
4609   case OMPC_DEPEND_depobj:
4610   case OMPC_DEPEND_unknown:
4611     llvm_unreachable("Unknown task dependence type");
4612   }
4613   return DepKind;
4614 }
4615 
4616 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4617 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4618                            QualType &FlagsTy) {
4619   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4620   if (KmpDependInfoTy.isNull()) {
4621     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4622     KmpDependInfoRD->startDefinition();
4623     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4624     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4625     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4626     KmpDependInfoRD->completeDefinition();
4627     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4628   }
4629 }
4630 
4631 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4632 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4633                                    SourceLocation Loc) {
4634   ASTContext &C = CGM.getContext();
4635   QualType FlagsTy;
4636   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4637   RecordDecl *KmpDependInfoRD =
4638       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4639   LValue Base = CGF.EmitLoadOfPointerLValue(
4640       DepobjLVal.getAddress(CGF),
4641       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4642   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4643   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4644           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4645   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4646                             Base.getTBAAInfo());
4647   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4648       Addr.getElementType(), Addr.getPointer(),
4649       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4650   LValue NumDepsBase = CGF.MakeAddrLValue(
4651       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4652       Base.getBaseInfo(), Base.getTBAAInfo());
4653   // NumDeps = deps[i].base_addr;
4654   LValue BaseAddrLVal = CGF.EmitLValueForField(
4655       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4656   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4657   return std::make_pair(NumDeps, Base);
4658 }
4659 
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4660 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4661                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4662                            const OMPTaskDataTy::DependData &Data,
4663                            Address DependenciesArray) {
4664   CodeGenModule &CGM = CGF.CGM;
4665   ASTContext &C = CGM.getContext();
4666   QualType FlagsTy;
4667   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668   RecordDecl *KmpDependInfoRD =
4669       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4671 
4672   OMPIteratorGeneratorScope IteratorScope(
4673       CGF, cast_or_null<OMPIteratorExpr>(
4674                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4675                                  : nullptr));
4676   for (const Expr *E : Data.DepExprs) {
4677     llvm::Value *Addr;
4678     llvm::Value *Size;
4679     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4680     LValue Base;
4681     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4682       Base = CGF.MakeAddrLValue(
4683           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4684     } else {
4685       LValue &PosLVal = *Pos.get<LValue *>();
4686       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4687       Base = CGF.MakeAddrLValue(
4688           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4689                                         DependenciesArray.getPointer(), Idx),
4690                   DependenciesArray.getAlignment()),
4691           KmpDependInfoTy);
4692     }
4693     // deps[i].base_addr = &<Dependencies[i].second>;
4694     LValue BaseAddrLVal = CGF.EmitLValueForField(
4695         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4696     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4697                           BaseAddrLVal);
4698     // deps[i].len = sizeof(<Dependencies[i].second>);
4699     LValue LenLVal = CGF.EmitLValueForField(
4700         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4701     CGF.EmitStoreOfScalar(Size, LenLVal);
4702     // deps[i].flags = <Dependencies[i].first>;
4703     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4704     LValue FlagsLVal = CGF.EmitLValueForField(
4705         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4706     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4707                           FlagsLVal);
4708     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4709       ++(*P);
4710     } else {
4711       LValue &PosLVal = *Pos.get<LValue *>();
4712       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4713       Idx = CGF.Builder.CreateNUWAdd(Idx,
4714                                      llvm::ConstantInt::get(Idx->getType(), 1));
4715       CGF.EmitStoreOfScalar(Idx, PosLVal);
4716     }
4717   }
4718 }
4719 
4720 static SmallVector<llvm::Value *, 4>
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4721 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4722                         const OMPTaskDataTy::DependData &Data) {
4723   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4724          "Expected depobj dependecy kind.");
4725   SmallVector<llvm::Value *, 4> Sizes;
4726   SmallVector<LValue, 4> SizeLVals;
4727   ASTContext &C = CGF.getContext();
4728   QualType FlagsTy;
4729   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4730   RecordDecl *KmpDependInfoRD =
4731       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4732   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4733   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4734   {
4735     OMPIteratorGeneratorScope IteratorScope(
4736         CGF, cast_or_null<OMPIteratorExpr>(
4737                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4738                                    : nullptr));
4739     for (const Expr *E : Data.DepExprs) {
4740       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4741       LValue Base = CGF.EmitLoadOfPointerLValue(
4742           DepobjLVal.getAddress(CGF),
4743           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4744       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4745           Base.getAddress(CGF), KmpDependInfoPtrT);
4746       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4747                                 Base.getTBAAInfo());
4748       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4749           Addr.getElementType(), Addr.getPointer(),
4750           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4751       LValue NumDepsBase = CGF.MakeAddrLValue(
4752           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4753           Base.getBaseInfo(), Base.getTBAAInfo());
4754       // NumDeps = deps[i].base_addr;
4755       LValue BaseAddrLVal = CGF.EmitLValueForField(
4756           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4757       llvm::Value *NumDeps =
4758           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4759       LValue NumLVal = CGF.MakeAddrLValue(
4760           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761           C.getUIntPtrType());
4762       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4763                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4764       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766       CGF.EmitStoreOfScalar(Add, NumLVal);
4767       SizeLVals.push_back(NumLVal);
4768     }
4769   }
4770   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771     llvm::Value *Size =
4772         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773     Sizes.push_back(Size);
4774   }
4775   return Sizes;
4776 }
4777 
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4778 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4779                                LValue PosLVal,
4780                                const OMPTaskDataTy::DependData &Data,
4781                                Address DependenciesArray) {
4782   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4783          "Expected depobj dependecy kind.");
4784   ASTContext &C = CGF.getContext();
4785   QualType FlagsTy;
4786   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4787   RecordDecl *KmpDependInfoRD =
4788       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4789   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4790   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4791   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4792   {
4793     OMPIteratorGeneratorScope IteratorScope(
4794         CGF, cast_or_null<OMPIteratorExpr>(
4795                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4796                                    : nullptr));
4797     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4798       const Expr *E = Data.DepExprs[I];
4799       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4800       LValue Base = CGF.EmitLoadOfPointerLValue(
4801           DepobjLVal.getAddress(CGF),
4802           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4803       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804           Base.getAddress(CGF), KmpDependInfoPtrT);
4805       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4806                                 Base.getTBAAInfo());
4807 
4808       // Get number of elements in a single depobj.
4809       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4810           Addr.getElementType(), Addr.getPointer(),
4811           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4812       LValue NumDepsBase = CGF.MakeAddrLValue(
4813           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4814           Base.getBaseInfo(), Base.getTBAAInfo());
4815       // NumDeps = deps[i].base_addr;
4816       LValue BaseAddrLVal = CGF.EmitLValueForField(
4817           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4818       llvm::Value *NumDeps =
4819           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4820 
4821       // memcopy dependency data.
4822       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4823           ElSize,
4824           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4825       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4826       Address DepAddr =
4827           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4828                                         DependenciesArray.getPointer(), Pos),
4829                   DependenciesArray.getAlignment());
4830       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4831 
4832       // Increase pos.
4833       // pos += size;
4834       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4835       CGF.EmitStoreOfScalar(Add, PosLVal);
4836     }
4837   }
4838 }
4839 
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4840 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4841     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4842     SourceLocation Loc) {
4843   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4844         return D.DepExprs.empty();
4845       }))
4846     return std::make_pair(nullptr, Address::invalid());
4847   // Process list of dependencies.
4848   ASTContext &C = CGM.getContext();
4849   Address DependenciesArray = Address::invalid();
4850   llvm::Value *NumOfElements = nullptr;
4851   unsigned NumDependencies = std::accumulate(
4852       Dependencies.begin(), Dependencies.end(), 0,
4853       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4854         return D.DepKind == OMPC_DEPEND_depobj
4855                    ? V
4856                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4857       });
4858   QualType FlagsTy;
4859   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4860   bool HasDepobjDeps = false;
4861   bool HasRegularWithIterators = false;
4862   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4863   llvm::Value *NumOfRegularWithIterators =
4864       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4865   // Calculate number of depobj dependecies and regular deps with the iterators.
4866   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4867     if (D.DepKind == OMPC_DEPEND_depobj) {
4868       SmallVector<llvm::Value *, 4> Sizes =
4869           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4870       for (llvm::Value *Size : Sizes) {
4871         NumOfDepobjElements =
4872             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4873       }
4874       HasDepobjDeps = true;
4875       continue;
4876     }
4877     // Include number of iterations, if any.
4878     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4879       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4880         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4881         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4882         NumOfRegularWithIterators =
4883             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4884       }
4885       HasRegularWithIterators = true;
4886       continue;
4887     }
4888   }
4889 
4890   QualType KmpDependInfoArrayTy;
4891   if (HasDepobjDeps || HasRegularWithIterators) {
4892     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4893                                            /*isSigned=*/false);
4894     if (HasDepobjDeps) {
4895       NumOfElements =
4896           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4897     }
4898     if (HasRegularWithIterators) {
4899       NumOfElements =
4900           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4901     }
4902     OpaqueValueExpr OVE(Loc,
4903                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4904                         VK_PRValue);
4905     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4906                                                   RValue::get(NumOfElements));
4907     KmpDependInfoArrayTy =
4908         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4909                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4910     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4911     // Properly emit variable-sized array.
4912     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4913                                          ImplicitParamDecl::Other);
4914     CGF.EmitVarDecl(*PD);
4915     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4916     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4917                                               /*isSigned=*/false);
4918   } else {
4919     KmpDependInfoArrayTy = C.getConstantArrayType(
4920         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4921         ArrayType::Normal, /*IndexTypeQuals=*/0);
4922     DependenciesArray =
4923         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4924     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4925     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4926                                            /*isSigned=*/false);
4927   }
4928   unsigned Pos = 0;
4929   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4930     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4931         Dependencies[I].IteratorExpr)
4932       continue;
4933     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4934                    DependenciesArray);
4935   }
4936   // Copy regular dependecies with iterators.
4937   LValue PosLVal = CGF.MakeAddrLValue(
4938       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4939   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4940   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4942         !Dependencies[I].IteratorExpr)
4943       continue;
4944     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4945                    DependenciesArray);
4946   }
4947   // Copy final depobj arrays without iterators.
4948   if (HasDepobjDeps) {
4949     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4951         continue;
4952       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4953                          DependenciesArray);
4954     }
4955   }
4956   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4957       DependenciesArray, CGF.VoidPtrTy);
4958   return std::make_pair(NumOfElements, DependenciesArray);
4959 }
4960 
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4961 Address CGOpenMPRuntime::emitDepobjDependClause(
4962     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4963     SourceLocation Loc) {
4964   if (Dependencies.DepExprs.empty())
4965     return Address::invalid();
4966   // Process list of dependencies.
4967   ASTContext &C = CGM.getContext();
4968   Address DependenciesArray = Address::invalid();
4969   unsigned NumDependencies = Dependencies.DepExprs.size();
4970   QualType FlagsTy;
4971   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4972   RecordDecl *KmpDependInfoRD =
4973       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4974 
4975   llvm::Value *Size;
4976   // Define type kmp_depend_info[<Dependencies.size()>];
4977   // For depobj reserve one extra element to store the number of elements.
4978   // It is required to handle depobj(x) update(in) construct.
4979   // kmp_depend_info[<Dependencies.size()>] deps;
4980   llvm::Value *NumDepsVal;
4981   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4982   if (const auto *IE =
4983           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4984     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4985     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4986       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4987       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4988       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4989     }
4990     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4991                                     NumDepsVal);
4992     CharUnits SizeInBytes =
4993         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4994     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4995     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4996     NumDepsVal =
4997         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4998   } else {
4999     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5000         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5001         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5002     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5003     Size = CGM.getSize(Sz.alignTo(Align));
5004     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5005   }
5006   // Need to allocate on the dynamic memory.
5007   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5008   // Use default allocator.
5009   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5011 
5012   llvm::Value *Addr =
5013       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5014                               CGM.getModule(), OMPRTL___kmpc_alloc),
5015                           Args, ".dep.arr.addr");
5016   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5017       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5018   DependenciesArray = Address(Addr, Align);
5019   // Write number of elements in the first element of array for depobj.
5020   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5021   // deps[i].base_addr = NumDependencies;
5022   LValue BaseAddrLVal = CGF.EmitLValueForField(
5023       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5024   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5025   llvm::PointerUnion<unsigned *, LValue *> Pos;
5026   unsigned Idx = 1;
5027   LValue PosLVal;
5028   if (Dependencies.IteratorExpr) {
5029     PosLVal = CGF.MakeAddrLValue(
5030         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5031         C.getSizeType());
5032     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5033                           /*IsInit=*/true);
5034     Pos = &PosLVal;
5035   } else {
5036     Pos = &Idx;
5037   }
5038   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5039   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5040       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5041   return DependenciesArray;
5042 }
5043 
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)5044 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5045                                         SourceLocation Loc) {
5046   ASTContext &C = CGM.getContext();
5047   QualType FlagsTy;
5048   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5049   LValue Base = CGF.EmitLoadOfPointerLValue(
5050       DepobjLVal.getAddress(CGF),
5051       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5052   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5053   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5054       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5055   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5056       Addr.getElementType(), Addr.getPointer(),
5057       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5058   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5059                                                                CGF.VoidPtrTy);
5060   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5061   // Use default allocator.
5062   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5063   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5064 
5065   // _kmpc_free(gtid, addr, nullptr);
5066   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                                 CGM.getModule(), OMPRTL___kmpc_free),
5068                             Args);
5069 }
5070 
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)5071 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5072                                        OpenMPDependClauseKind NewDepKind,
5073                                        SourceLocation Loc) {
5074   ASTContext &C = CGM.getContext();
5075   QualType FlagsTy;
5076   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5077   RecordDecl *KmpDependInfoRD =
5078       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5079   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5080   llvm::Value *NumDeps;
5081   LValue Base;
5082   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5083 
5084   Address Begin = Base.getAddress(CGF);
5085   // Cast from pointer to array type to pointer to single element.
5086   llvm::Value *End = CGF.Builder.CreateGEP(
5087       Begin.getElementType(), Begin.getPointer(), NumDeps);
5088   // The basic structure here is a while-do loop.
5089   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5090   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5091   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5092   CGF.EmitBlock(BodyBB);
5093   llvm::PHINode *ElementPHI =
5094       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5095   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5096   Begin = Address(ElementPHI, Begin.getAlignment());
5097   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5098                             Base.getTBAAInfo());
5099   // deps[i].flags = NewDepKind;
5100   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5101   LValue FlagsLVal = CGF.EmitLValueForField(
5102       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5103   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5104                         FlagsLVal);
5105 
5106   // Shift the address forward by one element.
5107   Address ElementNext =
5108       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5109   ElementPHI->addIncoming(ElementNext.getPointer(),
5110                           CGF.Builder.GetInsertBlock());
5111   llvm::Value *IsEmpty =
5112       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5113   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5114   // Done.
5115   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5116 }
5117 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5118 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5119                                    const OMPExecutableDirective &D,
5120                                    llvm::Function *TaskFunction,
5121                                    QualType SharedsTy, Address Shareds,
5122                                    const Expr *IfCond,
5123                                    const OMPTaskDataTy &Data) {
5124   if (!CGF.HaveInsertPoint())
5125     return;
5126 
5127   TaskResultTy Result =
5128       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5129   llvm::Value *NewTask = Result.NewTask;
5130   llvm::Function *TaskEntry = Result.TaskEntry;
5131   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5132   LValue TDBase = Result.TDBase;
5133   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5134   // Process list of dependences.
5135   Address DependenciesArray = Address::invalid();
5136   llvm::Value *NumOfElements;
5137   std::tie(NumOfElements, DependenciesArray) =
5138       emitDependClause(CGF, Data.Dependences, Loc);
5139 
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5143   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5144   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5145   // list is not empty
5146   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5147   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5148   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5149   llvm::Value *DepTaskArgs[7];
5150   if (!Data.Dependences.empty()) {
5151     DepTaskArgs[0] = UpLoc;
5152     DepTaskArgs[1] = ThreadID;
5153     DepTaskArgs[2] = NewTask;
5154     DepTaskArgs[3] = NumOfElements;
5155     DepTaskArgs[4] = DependenciesArray.getPointer();
5156     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5157     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5158   }
5159   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5160                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5161     if (!Data.Tied) {
5162       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5163       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5164       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5165     }
5166     if (!Data.Dependences.empty()) {
5167       CGF.EmitRuntimeCall(
5168           OMPBuilder.getOrCreateRuntimeFunction(
5169               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5170           DepTaskArgs);
5171     } else {
5172       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5173                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5174                           TaskArgs);
5175     }
5176     // Check if parent region is untied and build return for untied task;
5177     if (auto *Region =
5178             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5179       Region->emitUntiedSwitch(CGF);
5180   };
5181 
5182   llvm::Value *DepWaitTaskArgs[6];
5183   if (!Data.Dependences.empty()) {
5184     DepWaitTaskArgs[0] = UpLoc;
5185     DepWaitTaskArgs[1] = ThreadID;
5186     DepWaitTaskArgs[2] = NumOfElements;
5187     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5188     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5189     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5190   }
5191   auto &M = CGM.getModule();
5192   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5193                         TaskEntry, &Data, &DepWaitTaskArgs,
5194                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5195     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5196     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5197     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5198     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5199     // is specified.
5200     if (!Data.Dependences.empty())
5201       CGF.EmitRuntimeCall(
5202           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5203           DepWaitTaskArgs);
5204     // Call proxy_task_entry(gtid, new_task);
5205     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5206                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5207       Action.Enter(CGF);
5208       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5209       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5210                                                           OutlinedFnArgs);
5211     };
5212 
5213     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5214     // kmp_task_t *new_task);
5215     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5216     // kmp_task_t *new_task);
5217     RegionCodeGenTy RCG(CodeGen);
5218     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5219                               M, OMPRTL___kmpc_omp_task_begin_if0),
5220                           TaskArgs,
5221                           OMPBuilder.getOrCreateRuntimeFunction(
5222                               M, OMPRTL___kmpc_omp_task_complete_if0),
5223                           TaskArgs);
5224     RCG.setAction(Action);
5225     RCG(CGF);
5226   };
5227 
5228   if (IfCond) {
5229     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5230   } else {
5231     RegionCodeGenTy ThenRCG(ThenCodeGen);
5232     ThenRCG(CGF);
5233   }
5234 }
5235 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5236 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5237                                        const OMPLoopDirective &D,
5238                                        llvm::Function *TaskFunction,
5239                                        QualType SharedsTy, Address Shareds,
5240                                        const Expr *IfCond,
5241                                        const OMPTaskDataTy &Data) {
5242   if (!CGF.HaveInsertPoint())
5243     return;
5244   TaskResultTy Result =
5245       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5246   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5247   // libcall.
5248   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5249   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5250   // sched, kmp_uint64 grainsize, void *task_dup);
5251   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5252   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5253   llvm::Value *IfVal;
5254   if (IfCond) {
5255     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5256                                       /*isSigned=*/true);
5257   } else {
5258     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5259   }
5260 
5261   LValue LBLVal = CGF.EmitLValueForField(
5262       Result.TDBase,
5263       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5264   const auto *LBVar =
5265       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5266   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5267                        LBLVal.getQuals(),
5268                        /*IsInitializer=*/true);
5269   LValue UBLVal = CGF.EmitLValueForField(
5270       Result.TDBase,
5271       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5272   const auto *UBVar =
5273       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5274   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5275                        UBLVal.getQuals(),
5276                        /*IsInitializer=*/true);
5277   LValue StLVal = CGF.EmitLValueForField(
5278       Result.TDBase,
5279       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5280   const auto *StVar =
5281       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5282   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5283                        StLVal.getQuals(),
5284                        /*IsInitializer=*/true);
5285   // Store reductions address.
5286   LValue RedLVal = CGF.EmitLValueForField(
5287       Result.TDBase,
5288       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5289   if (Data.Reductions) {
5290     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5291   } else {
5292     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5293                                CGF.getContext().VoidPtrTy);
5294   }
5295   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5296   llvm::Value *TaskArgs[] = {
5297       UpLoc,
5298       ThreadID,
5299       Result.NewTask,
5300       IfVal,
5301       LBLVal.getPointer(CGF),
5302       UBLVal.getPointer(CGF),
5303       CGF.EmitLoadOfScalar(StLVal, Loc),
5304       llvm::ConstantInt::getSigned(
5305           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5306       llvm::ConstantInt::getSigned(
5307           CGF.IntTy, Data.Schedule.getPointer()
5308                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5309                          : NoSchedule),
5310       Data.Schedule.getPointer()
5311           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5312                                       /*isSigned=*/false)
5313           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5314       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5315                              Result.TaskDupFn, CGF.VoidPtrTy)
5316                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5317   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5318                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5319                       TaskArgs);
5320 }
5321 
5322 /// Emit reduction operation for each element of array (required for
5323 /// array sections) LHS op = RHS.
5324 /// \param Type Type of array.
5325 /// \param LHSVar Variable on the left side of the reduction operation
5326 /// (references element of array in original variable).
5327 /// \param RHSVar Variable on the right side of the reduction operation
5328 /// (references element of array in original variable).
5329 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5330 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5331 static void EmitOMPAggregateReduction(
5332     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5333     const VarDecl *RHSVar,
5334     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5335                                   const Expr *, const Expr *)> &RedOpGen,
5336     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5337     const Expr *UpExpr = nullptr) {
5338   // Perform element-by-element initialization.
5339   QualType ElementTy;
5340   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5341   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5342 
5343   // Drill down to the base element type on both arrays.
5344   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5345   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5346 
5347   llvm::Value *RHSBegin = RHSAddr.getPointer();
5348   llvm::Value *LHSBegin = LHSAddr.getPointer();
5349   // Cast from pointer to array type to pointer to single element.
5350   llvm::Value *LHSEnd =
5351       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5352   // The basic structure here is a while-do loop.
5353   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5354   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5355   llvm::Value *IsEmpty =
5356       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5357   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5358 
5359   // Enter the loop body, making that address the current address.
5360   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5361   CGF.EmitBlock(BodyBB);
5362 
5363   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5364 
5365   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5366       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5367   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5368   Address RHSElementCurrent =
5369       Address(RHSElementPHI,
5370               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5371 
5372   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5373       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5374   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5375   Address LHSElementCurrent =
5376       Address(LHSElementPHI,
5377               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5378 
5379   // Emit copy.
5380   CodeGenFunction::OMPPrivateScope Scope(CGF);
5381   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5382   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5383   Scope.Privatize();
5384   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5385   Scope.ForceCleanup();
5386 
5387   // Shift the address forward by one element.
5388   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5389       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5390       "omp.arraycpy.dest.element");
5391   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5392       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5393       "omp.arraycpy.src.element");
5394   // Check whether we've reached the end.
5395   llvm::Value *Done =
5396       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5397   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5398   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5399   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5400 
5401   // Done.
5402   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5403 }
5404 
5405 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5406 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5407 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5408 static void emitReductionCombiner(CodeGenFunction &CGF,
5409                                   const Expr *ReductionOp) {
5410   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5411     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5412       if (const auto *DRE =
5413               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5414         if (const auto *DRD =
5415                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5416           std::pair<llvm::Function *, llvm::Function *> Reduction =
5417               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5418           RValue Func = RValue::get(Reduction.first);
5419           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5420           CGF.EmitIgnoredExpr(ReductionOp);
5421           return;
5422         }
5423   CGF.EmitIgnoredExpr(ReductionOp);
5424 }
5425 
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5426 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5427     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5428     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5429     ArrayRef<const Expr *> ReductionOps) {
5430   ASTContext &C = CGM.getContext();
5431 
5432   // void reduction_func(void *LHSArg, void *RHSArg);
5433   FunctionArgList Args;
5434   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5435                            ImplicitParamDecl::Other);
5436   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5437                            ImplicitParamDecl::Other);
5438   Args.push_back(&LHSArg);
5439   Args.push_back(&RHSArg);
5440   const auto &CGFI =
5441       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5442   std::string Name = getName({"omp", "reduction", "reduction_func"});
5443   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5444                                     llvm::GlobalValue::InternalLinkage, Name,
5445                                     &CGM.getModule());
5446   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5447   Fn->setDoesNotRecurse();
5448   CodeGenFunction CGF(CGM);
5449   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5450 
5451   // Dst = (void*[n])(LHSArg);
5452   // Src = (void*[n])(RHSArg);
5453   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5454       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5455       ArgsType), CGF.getPointerAlign());
5456   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5457       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5458       ArgsType), CGF.getPointerAlign());
5459 
5460   //  ...
5461   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5462   //  ...
5463   CodeGenFunction::OMPPrivateScope Scope(CGF);
5464   auto IPriv = Privates.begin();
5465   unsigned Idx = 0;
5466   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5467     const auto *RHSVar =
5468         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5469     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5470       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5471     });
5472     const auto *LHSVar =
5473         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5474     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5475       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5476     });
5477     QualType PrivTy = (*IPriv)->getType();
5478     if (PrivTy->isVariablyModifiedType()) {
5479       // Get array size and emit VLA type.
5480       ++Idx;
5481       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5482       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5483       const VariableArrayType *VLA =
5484           CGF.getContext().getAsVariableArrayType(PrivTy);
5485       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5486       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5487           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5488       CGF.EmitVariablyModifiedType(PrivTy);
5489     }
5490   }
5491   Scope.Privatize();
5492   IPriv = Privates.begin();
5493   auto ILHS = LHSExprs.begin();
5494   auto IRHS = RHSExprs.begin();
5495   for (const Expr *E : ReductionOps) {
5496     if ((*IPriv)->getType()->isArrayType()) {
5497       // Emit reduction for array section.
5498       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5499       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5500       EmitOMPAggregateReduction(
5501           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5502           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5503             emitReductionCombiner(CGF, E);
5504           });
5505     } else {
5506       // Emit reduction for array subscript or single variable.
5507       emitReductionCombiner(CGF, E);
5508     }
5509     ++IPriv;
5510     ++ILHS;
5511     ++IRHS;
5512   }
5513   Scope.ForceCleanup();
5514   CGF.FinishFunction();
5515   return Fn;
5516 }
5517 
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5518 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5519                                                   const Expr *ReductionOp,
5520                                                   const Expr *PrivateRef,
5521                                                   const DeclRefExpr *LHS,
5522                                                   const DeclRefExpr *RHS) {
5523   if (PrivateRef->getType()->isArrayType()) {
5524     // Emit reduction for array section.
5525     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5526     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5527     EmitOMPAggregateReduction(
5528         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5529         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5530           emitReductionCombiner(CGF, ReductionOp);
5531         });
5532   } else {
5533     // Emit reduction for array subscript or single variable.
5534     emitReductionCombiner(CGF, ReductionOp);
5535   }
5536 }
5537 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5538 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5539                                     ArrayRef<const Expr *> Privates,
5540                                     ArrayRef<const Expr *> LHSExprs,
5541                                     ArrayRef<const Expr *> RHSExprs,
5542                                     ArrayRef<const Expr *> ReductionOps,
5543                                     ReductionOptionsTy Options) {
5544   if (!CGF.HaveInsertPoint())
5545     return;
5546 
5547   bool WithNowait = Options.WithNowait;
5548   bool SimpleReduction = Options.SimpleReduction;
5549 
5550   // Next code should be emitted for reduction:
5551   //
5552   // static kmp_critical_name lock = { 0 };
5553   //
5554   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5555   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5556   //  ...
5557   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5558   //  *(Type<n>-1*)rhs[<n>-1]);
5559   // }
5560   //
5561   // ...
5562   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5563   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5564   // RedList, reduce_func, &<lock>)) {
5565   // case 1:
5566   //  ...
5567   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5568   //  ...
5569   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5570   // break;
5571   // case 2:
5572   //  ...
5573   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5574   //  ...
5575   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5576   // break;
5577   // default:;
5578   // }
5579   //
5580   // if SimpleReduction is true, only the next code is generated:
5581   //  ...
5582   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5583   //  ...
5584 
5585   ASTContext &C = CGM.getContext();
5586 
5587   if (SimpleReduction) {
5588     CodeGenFunction::RunCleanupsScope Scope(CGF);
5589     auto IPriv = Privates.begin();
5590     auto ILHS = LHSExprs.begin();
5591     auto IRHS = RHSExprs.begin();
5592     for (const Expr *E : ReductionOps) {
5593       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5594                                   cast<DeclRefExpr>(*IRHS));
5595       ++IPriv;
5596       ++ILHS;
5597       ++IRHS;
5598     }
5599     return;
5600   }
5601 
5602   // 1. Build a list of reduction variables.
5603   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5604   auto Size = RHSExprs.size();
5605   for (const Expr *E : Privates) {
5606     if (E->getType()->isVariablyModifiedType())
5607       // Reserve place for array size.
5608       ++Size;
5609   }
5610   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5611   QualType ReductionArrayTy =
5612       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5613                              /*IndexTypeQuals=*/0);
5614   Address ReductionList =
5615       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5616   auto IPriv = Privates.begin();
5617   unsigned Idx = 0;
5618   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5619     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5620     CGF.Builder.CreateStore(
5621         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5622             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5623         Elem);
5624     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5625       // Store array size.
5626       ++Idx;
5627       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5628       llvm::Value *Size = CGF.Builder.CreateIntCast(
5629           CGF.getVLASize(
5630                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5631               .NumElts,
5632           CGF.SizeTy, /*isSigned=*/false);
5633       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5634                               Elem);
5635     }
5636   }
5637 
5638   // 2. Emit reduce_func().
5639   llvm::Function *ReductionFn = emitReductionFunction(
5640       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5641       LHSExprs, RHSExprs, ReductionOps);
5642 
5643   // 3. Create static kmp_critical_name lock = { 0 };
5644   std::string Name = getName({"reduction"});
5645   llvm::Value *Lock = getCriticalRegionLock(Name);
5646 
5647   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5648   // RedList, reduce_func, &<lock>);
5649   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5650   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5651   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5652   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5653       ReductionList.getPointer(), CGF.VoidPtrTy);
5654   llvm::Value *Args[] = {
5655       IdentTLoc,                             // ident_t *<loc>
5656       ThreadId,                              // i32 <gtid>
5657       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5658       ReductionArrayTySize,                  // size_type sizeof(RedList)
5659       RL,                                    // void *RedList
5660       ReductionFn, // void (*) (void *, void *) <reduce_func>
5661       Lock         // kmp_critical_name *&<lock>
5662   };
5663   llvm::Value *Res = CGF.EmitRuntimeCall(
5664       OMPBuilder.getOrCreateRuntimeFunction(
5665           CGM.getModule(),
5666           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5667       Args);
5668 
5669   // 5. Build switch(res)
5670   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5671   llvm::SwitchInst *SwInst =
5672       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5673 
5674   // 6. Build case 1:
5675   //  ...
5676   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5677   //  ...
5678   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5679   // break;
5680   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5681   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5682   CGF.EmitBlock(Case1BB);
5683 
5684   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   llvm::Value *EndArgs[] = {
5686       IdentTLoc, // ident_t *<loc>
5687       ThreadId,  // i32 <gtid>
5688       Lock       // kmp_critical_name *&<lock>
5689   };
5690   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5691                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5692     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5693     auto IPriv = Privates.begin();
5694     auto ILHS = LHSExprs.begin();
5695     auto IRHS = RHSExprs.begin();
5696     for (const Expr *E : ReductionOps) {
5697       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5698                                      cast<DeclRefExpr>(*IRHS));
5699       ++IPriv;
5700       ++ILHS;
5701       ++IRHS;
5702     }
5703   };
5704   RegionCodeGenTy RCG(CodeGen);
5705   CommonActionTy Action(
5706       nullptr, llvm::None,
5707       OMPBuilder.getOrCreateRuntimeFunction(
5708           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5709                                       : OMPRTL___kmpc_end_reduce),
5710       EndArgs);
5711   RCG.setAction(Action);
5712   RCG(CGF);
5713 
5714   CGF.EmitBranch(DefaultBB);
5715 
5716   // 7. Build case 2:
5717   //  ...
5718   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5719   //  ...
5720   // break;
5721   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5722   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5723   CGF.EmitBlock(Case2BB);
5724 
5725   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5726                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5727     auto ILHS = LHSExprs.begin();
5728     auto IRHS = RHSExprs.begin();
5729     auto IPriv = Privates.begin();
5730     for (const Expr *E : ReductionOps) {
5731       const Expr *XExpr = nullptr;
5732       const Expr *EExpr = nullptr;
5733       const Expr *UpExpr = nullptr;
5734       BinaryOperatorKind BO = BO_Comma;
5735       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5736         if (BO->getOpcode() == BO_Assign) {
5737           XExpr = BO->getLHS();
5738           UpExpr = BO->getRHS();
5739         }
5740       }
5741       // Try to emit update expression as a simple atomic.
5742       const Expr *RHSExpr = UpExpr;
5743       if (RHSExpr) {
5744         // Analyze RHS part of the whole expression.
5745         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5746                 RHSExpr->IgnoreParenImpCasts())) {
5747           // If this is a conditional operator, analyze its condition for
5748           // min/max reduction operator.
5749           RHSExpr = ACO->getCond();
5750         }
5751         if (const auto *BORHS =
5752                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5753           EExpr = BORHS->getRHS();
5754           BO = BORHS->getOpcode();
5755         }
5756       }
5757       if (XExpr) {
5758         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5759         auto &&AtomicRedGen = [BO, VD,
5760                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5761                                     const Expr *EExpr, const Expr *UpExpr) {
5762           LValue X = CGF.EmitLValue(XExpr);
5763           RValue E;
5764           if (EExpr)
5765             E = CGF.EmitAnyExpr(EExpr);
5766           CGF.EmitOMPAtomicSimpleUpdateExpr(
5767               X, E, BO, /*IsXLHSInRHSPart=*/true,
5768               llvm::AtomicOrdering::Monotonic, Loc,
5769               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5770                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5771                 PrivateScope.addPrivate(
5772                     VD, [&CGF, VD, XRValue, Loc]() {
5773                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5774                       CGF.emitOMPSimpleStore(
5775                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5776                           VD->getType().getNonReferenceType(), Loc);
5777                       return LHSTemp;
5778                     });
5779                 (void)PrivateScope.Privatize();
5780                 return CGF.EmitAnyExpr(UpExpr);
5781               });
5782         };
5783         if ((*IPriv)->getType()->isArrayType()) {
5784           // Emit atomic reduction for array section.
5785           const auto *RHSVar =
5786               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5788                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5789         } else {
5790           // Emit atomic reduction for array subscript or single variable.
5791           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5792         }
5793       } else {
5794         // Emit as a critical region.
5795         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5796                                            const Expr *, const Expr *) {
5797           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5798           std::string Name = RT.getName({"atomic_reduction"});
5799           RT.emitCriticalRegion(
5800               CGF, Name,
5801               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5802                 Action.Enter(CGF);
5803                 emitReductionCombiner(CGF, E);
5804               },
5805               Loc);
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           const auto *LHSVar =
5809               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5810           const auto *RHSVar =
5811               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5812           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5813                                     CritRedGen);
5814         } else {
5815           CritRedGen(CGF, nullptr, nullptr, nullptr);
5816         }
5817       }
5818       ++ILHS;
5819       ++IRHS;
5820       ++IPriv;
5821     }
5822   };
5823   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5824   if (!WithNowait) {
5825     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5826     llvm::Value *EndArgs[] = {
5827         IdentTLoc, // ident_t *<loc>
5828         ThreadId,  // i32 <gtid>
5829         Lock       // kmp_critical_name *&<lock>
5830     };
5831     CommonActionTy Action(nullptr, llvm::None,
5832                           OMPBuilder.getOrCreateRuntimeFunction(
5833                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5834                           EndArgs);
5835     AtomicRCG.setAction(Action);
5836     AtomicRCG(CGF);
5837   } else {
5838     AtomicRCG(CGF);
5839   }
5840 
5841   CGF.EmitBranch(DefaultBB);
5842   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5843 }
5844 
5845 /// Generates unique name for artificial threadprivate variables.
5846 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5847 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5848                                       const Expr *Ref) {
5849   SmallString<256> Buffer;
5850   llvm::raw_svector_ostream Out(Buffer);
5851   const clang::DeclRefExpr *DE;
5852   const VarDecl *D = ::getBaseDecl(Ref, DE);
5853   if (!D)
5854     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5855   D = D->getCanonicalDecl();
5856   std::string Name = CGM.getOpenMPRuntime().getName(
5857       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5858   Out << Prefix << Name << "_"
5859       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5860   return std::string(Out.str());
5861 }
5862 
5863 /// Emits reduction initializer function:
5864 /// \code
5865 /// void @.red_init(void* %arg, void* %orig) {
5866 /// %0 = bitcast void* %arg to <type>*
5867 /// store <type> <init>, <type>* %0
5868 /// ret void
5869 /// }
5870 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5871 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5872                                            SourceLocation Loc,
5873                                            ReductionCodeGen &RCG, unsigned N) {
5874   ASTContext &C = CGM.getContext();
5875   QualType VoidPtrTy = C.VoidPtrTy;
5876   VoidPtrTy.addRestrict();
5877   FunctionArgList Args;
5878   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5879                           ImplicitParamDecl::Other);
5880   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5881                               ImplicitParamDecl::Other);
5882   Args.emplace_back(&Param);
5883   Args.emplace_back(&ParamOrig);
5884   const auto &FnInfo =
5885       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5886   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5887   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5888   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5889                                     Name, &CGM.getModule());
5890   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5891   Fn->setDoesNotRecurse();
5892   CodeGenFunction CGF(CGM);
5893   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5894   Address PrivateAddr = CGF.EmitLoadOfPointer(
5895       CGF.GetAddrOfLocalVar(&Param),
5896       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5897   llvm::Value *Size = nullptr;
5898   // If the size of the reduction item is non-constant, load it from global
5899   // threadprivate variable.
5900   if (RCG.getSizes(N).second) {
5901     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5902         CGF, CGM.getContext().getSizeType(),
5903         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5904     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5905                                 CGM.getContext().getSizeType(), Loc);
5906   }
5907   RCG.emitAggregateType(CGF, N, Size);
5908   LValue OrigLVal;
5909   // If initializer uses initializer from declare reduction construct, emit a
5910   // pointer to the address of the original reduction item (reuired by reduction
5911   // initializer)
5912   if (RCG.usesReductionInitializer(N)) {
5913     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5914     SharedAddr = CGF.EmitLoadOfPointer(
5915         SharedAddr,
5916         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5917     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5918   } else {
5919     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5920         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5921         CGM.getContext().VoidPtrTy);
5922   }
5923   // Emit the initializer:
5924   // %0 = bitcast void* %arg to <type>*
5925   // store <type> <init>, <type>* %0
5926   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5927                          [](CodeGenFunction &) { return false; });
5928   CGF.FinishFunction();
5929   return Fn;
5930 }
5931 
5932 /// Emits reduction combiner function:
5933 /// \code
5934 /// void @.red_comb(void* %arg0, void* %arg1) {
5935 /// %lhs = bitcast void* %arg0 to <type>*
5936 /// %rhs = bitcast void* %arg1 to <type>*
5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5938 /// store <type> %2, <type>* %lhs
5939 /// ret void
5940 /// }
5941 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5943                                            SourceLocation Loc,
5944                                            ReductionCodeGen &RCG, unsigned N,
5945                                            const Expr *ReductionOp,
5946                                            const Expr *LHS, const Expr *RHS,
5947                                            const Expr *PrivateRef) {
5948   ASTContext &C = CGM.getContext();
5949   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5950   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5951   FunctionArgList Args;
5952   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5953                                C.VoidPtrTy, ImplicitParamDecl::Other);
5954   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5955                             ImplicitParamDecl::Other);
5956   Args.emplace_back(&ParamInOut);
5957   Args.emplace_back(&ParamIn);
5958   const auto &FnInfo =
5959       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5960   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5961   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5962   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5963                                     Name, &CGM.getModule());
5964   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5965   Fn->setDoesNotRecurse();
5966   CodeGenFunction CGF(CGM);
5967   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5968   llvm::Value *Size = nullptr;
5969   // If the size of the reduction item is non-constant, load it from global
5970   // threadprivate variable.
5971   if (RCG.getSizes(N).second) {
5972     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5973         CGF, CGM.getContext().getSizeType(),
5974         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5975     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5976                                 CGM.getContext().getSizeType(), Loc);
5977   }
5978   RCG.emitAggregateType(CGF, N, Size);
5979   // Remap lhs and rhs variables to the addresses of the function arguments.
5980   // %lhs = bitcast void* %arg0 to <type>*
5981   // %rhs = bitcast void* %arg1 to <type>*
5982   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5983   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5984     // Pull out the pointer to the variable.
5985     Address PtrAddr = CGF.EmitLoadOfPointer(
5986         CGF.GetAddrOfLocalVar(&ParamInOut),
5987         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988     return CGF.Builder.CreateElementBitCast(
5989         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5990   });
5991   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5992     // Pull out the pointer to the variable.
5993     Address PtrAddr = CGF.EmitLoadOfPointer(
5994         CGF.GetAddrOfLocalVar(&ParamIn),
5995         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996     return CGF.Builder.CreateElementBitCast(
5997         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5998   });
5999   PrivateScope.Privatize();
6000   // Emit the combiner body:
6001   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6002   // store <type> %2, <type>* %lhs
6003   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6004       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6005       cast<DeclRefExpr>(RHS));
6006   CGF.FinishFunction();
6007   return Fn;
6008 }
6009 
6010 /// Emits reduction finalizer function:
6011 /// \code
6012 /// void @.red_fini(void* %arg) {
6013 /// %0 = bitcast void* %arg to <type>*
6014 /// <destroy>(<type>* %0)
6015 /// ret void
6016 /// }
6017 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6019                                            SourceLocation Loc,
6020                                            ReductionCodeGen &RCG, unsigned N) {
6021   if (!RCG.needCleanups(N))
6022     return nullptr;
6023   ASTContext &C = CGM.getContext();
6024   FunctionArgList Args;
6025   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6026                           ImplicitParamDecl::Other);
6027   Args.emplace_back(&Param);
6028   const auto &FnInfo =
6029       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6030   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6031   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6032   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6033                                     Name, &CGM.getModule());
6034   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6035   Fn->setDoesNotRecurse();
6036   CodeGenFunction CGF(CGM);
6037   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6038   Address PrivateAddr = CGF.EmitLoadOfPointer(
6039       CGF.GetAddrOfLocalVar(&Param),
6040       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6041   llvm::Value *Size = nullptr;
6042   // If the size of the reduction item is non-constant, load it from global
6043   // threadprivate variable.
6044   if (RCG.getSizes(N).second) {
6045     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6046         CGF, CGM.getContext().getSizeType(),
6047         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6048     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6049                                 CGM.getContext().getSizeType(), Loc);
6050   }
6051   RCG.emitAggregateType(CGF, N, Size);
6052   // Emit the finalizer body:
6053   // <destroy>(<type>* %0)
6054   RCG.emitCleanups(CGF, N, PrivateAddr);
6055   CGF.FinishFunction(Loc);
6056   return Fn;
6057 }
6058 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6060     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6061     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6062   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6063     return nullptr;
6064 
6065   // Build typedef struct:
6066   // kmp_taskred_input {
6067   //   void *reduce_shar; // shared reduction item
6068   //   void *reduce_orig; // original reduction item used for initialization
6069   //   size_t reduce_size; // size of data item
6070   //   void *reduce_init; // data initialization routine
6071   //   void *reduce_fini; // data finalization routine
6072   //   void *reduce_comb; // data combiner routine
6073   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6074   // } kmp_taskred_input_t;
6075   ASTContext &C = CGM.getContext();
6076   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6077   RD->startDefinition();
6078   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6081   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6085       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6086   RD->completeDefinition();
6087   QualType RDType = C.getRecordType(RD);
6088   unsigned Size = Data.ReductionVars.size();
6089   llvm::APInt ArraySize(/*numBits=*/64, Size);
6090   QualType ArrayRDType = C.getConstantArrayType(
6091       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6092   // kmp_task_red_input_t .rd_input.[Size];
6093   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6094   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6095                        Data.ReductionCopies, Data.ReductionOps);
6096   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6097     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6098     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6099                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6100     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6101         TaskRedInput.getPointer(), Idxs,
6102         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6103         ".rd_input.gep.");
6104     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6105     // ElemLVal.reduce_shar = &Shareds[Cnt];
6106     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6107     RCG.emitSharedOrigLValue(CGF, Cnt);
6108     llvm::Value *CastedShared =
6109         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6110     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6111     // ElemLVal.reduce_orig = &Origs[Cnt];
6112     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6113     llvm::Value *CastedOrig =
6114         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6115     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6116     RCG.emitAggregateType(CGF, Cnt);
6117     llvm::Value *SizeValInChars;
6118     llvm::Value *SizeVal;
6119     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6120     // We use delayed creation/initialization for VLAs and array sections. It is
6121     // required because runtime does not provide the way to pass the sizes of
6122     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6123     // threadprivate global variables are used to store these values and use
6124     // them in the functions.
6125     bool DelayedCreation = !!SizeVal;
6126     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6127                                                /*isSigned=*/false);
6128     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6129     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6130     // ElemLVal.reduce_init = init;
6131     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6132     llvm::Value *InitAddr =
6133         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6134     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6135     // ElemLVal.reduce_fini = fini;
6136     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6137     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6138     llvm::Value *FiniAddr = Fini
6139                                 ? CGF.EmitCastToVoidPtr(Fini)
6140                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6141     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6142     // ElemLVal.reduce_comb = comb;
6143     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6144     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6145         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6146         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6147     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6148     // ElemLVal.flags = 0;
6149     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6150     if (DelayedCreation) {
6151       CGF.EmitStoreOfScalar(
6152           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6153           FlagsLVal);
6154     } else
6155       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6156                                  FlagsLVal.getType());
6157   }
6158   if (Data.IsReductionWithTaskMod) {
6159     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160     // is_ws, int num, void *data);
6161     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                   CGM.IntTy, /*isSigned=*/true);
6164     llvm::Value *Args[] = {
6165         IdentTLoc, GTid,
6166         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6167                                /*isSigned=*/true),
6168         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6169         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6170             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6171     return CGF.EmitRuntimeCall(
6172         OMPBuilder.getOrCreateRuntimeFunction(
6173             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6174         Args);
6175   }
6176   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6177   llvm::Value *Args[] = {
6178       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6179                                 /*isSigned=*/true),
6180       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6181       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6182                                                       CGM.VoidPtrTy)};
6183   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6184                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6185                              Args);
6186 }
6187 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6189                                             SourceLocation Loc,
6190                                             bool IsWorksharingReduction) {
6191   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6192   // is_ws, int num, void *data);
6193   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6194   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6195                                                 CGM.IntTy, /*isSigned=*/true);
6196   llvm::Value *Args[] = {IdentTLoc, GTid,
6197                          llvm::ConstantInt::get(CGM.IntTy,
6198                                                 IsWorksharingReduction ? 1 : 0,
6199                                                 /*isSigned=*/true)};
6200   (void)CGF.EmitRuntimeCall(
6201       OMPBuilder.getOrCreateRuntimeFunction(
6202           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6203       Args);
6204 }
6205 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6207                                               SourceLocation Loc,
6208                                               ReductionCodeGen &RCG,
6209                                               unsigned N) {
6210   auto Sizes = RCG.getSizes(N);
6211   // Emit threadprivate global variable if the type is non-constant
6212   // (Sizes.second = nullptr).
6213   if (Sizes.second) {
6214     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6215                                                      /*isSigned=*/false);
6216     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6217         CGF, CGM.getContext().getSizeType(),
6218         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6219     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6220   }
6221 }
6222 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6224                                               SourceLocation Loc,
6225                                               llvm::Value *ReductionsPtr,
6226                                               LValue SharedLVal) {
6227   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6228   // *d);
6229   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6230                                                    CGM.IntTy,
6231                                                    /*isSigned=*/true),
6232                          ReductionsPtr,
6233                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6234                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6235   return Address(
6236       CGF.EmitRuntimeCall(
6237           OMPBuilder.getOrCreateRuntimeFunction(
6238               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6239           Args),
6240       SharedLVal.getAlignment());
6241 }
6242 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6244                                        SourceLocation Loc) {
6245   if (!CGF.HaveInsertPoint())
6246     return;
6247 
6248   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6249     OMPBuilder.createTaskwait(CGF.Builder);
6250   } else {
6251     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6252     // global_tid);
6253     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6254     // Ignore return result until untied tasks are supported.
6255     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6256                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6257                         Args);
6258   }
6259 
6260   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6261     Region->emitUntiedSwitch(CGF);
6262 }
6263 
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6264 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6265                                            OpenMPDirectiveKind InnerKind,
6266                                            const RegionCodeGenTy &CodeGen,
6267                                            bool HasCancel) {
6268   if (!CGF.HaveInsertPoint())
6269     return;
6270   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6271                                  InnerKind != OMPD_critical &&
6272                                      InnerKind != OMPD_master &&
6273                                      InnerKind != OMPD_masked);
6274   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6275 }
6276 
6277 namespace {
6278 enum RTCancelKind {
6279   CancelNoreq = 0,
6280   CancelParallel = 1,
6281   CancelLoop = 2,
6282   CancelSections = 3,
6283   CancelTaskgroup = 4
6284 };
6285 } // anonymous namespace
6286 
getCancellationKind(OpenMPDirectiveKind CancelRegion)6287 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6288   RTCancelKind CancelKind = CancelNoreq;
6289   if (CancelRegion == OMPD_parallel)
6290     CancelKind = CancelParallel;
6291   else if (CancelRegion == OMPD_for)
6292     CancelKind = CancelLoop;
6293   else if (CancelRegion == OMPD_sections)
6294     CancelKind = CancelSections;
6295   else {
6296     assert(CancelRegion == OMPD_taskgroup);
6297     CancelKind = CancelTaskgroup;
6298   }
6299   return CancelKind;
6300 }
6301 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6302 void CGOpenMPRuntime::emitCancellationPointCall(
6303     CodeGenFunction &CGF, SourceLocation Loc,
6304     OpenMPDirectiveKind CancelRegion) {
6305   if (!CGF.HaveInsertPoint())
6306     return;
6307   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6308   // global_tid, kmp_int32 cncl_kind);
6309   if (auto *OMPRegionInfo =
6310           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6311     // For 'cancellation point taskgroup', the task region info may not have a
6312     // cancel. This may instead happen in another adjacent task.
6313     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6314       llvm::Value *Args[] = {
6315           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6316           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6317       // Ignore return result until untied tasks are supported.
6318       llvm::Value *Result = CGF.EmitRuntimeCall(
6319           OMPBuilder.getOrCreateRuntimeFunction(
6320               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6321           Args);
6322       // if (__kmpc_cancellationpoint()) {
6323       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6324       //   exit from construct;
6325       // }
6326       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6327       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6328       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6329       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6330       CGF.EmitBlock(ExitBB);
6331       if (CancelRegion == OMPD_parallel)
6332         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6333       // exit from construct;
6334       CodeGenFunction::JumpDest CancelDest =
6335           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6336       CGF.EmitBranchThroughCleanup(CancelDest);
6337       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6338     }
6339   }
6340 }
6341 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6342 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6343                                      const Expr *IfCond,
6344                                      OpenMPDirectiveKind CancelRegion) {
6345   if (!CGF.HaveInsertPoint())
6346     return;
6347   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6348   // kmp_int32 cncl_kind);
6349   auto &M = CGM.getModule();
6350   if (auto *OMPRegionInfo =
6351           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6352     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6353                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6354       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6355       llvm::Value *Args[] = {
6356           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6357           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6358       // Ignore return result until untied tasks are supported.
6359       llvm::Value *Result = CGF.EmitRuntimeCall(
6360           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6361       // if (__kmpc_cancel()) {
6362       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6363       //   exit from construct;
6364       // }
6365       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6366       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6367       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6368       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6369       CGF.EmitBlock(ExitBB);
6370       if (CancelRegion == OMPD_parallel)
6371         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6372       // exit from construct;
6373       CodeGenFunction::JumpDest CancelDest =
6374           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6375       CGF.EmitBranchThroughCleanup(CancelDest);
6376       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6377     };
6378     if (IfCond) {
6379       emitIfClause(CGF, IfCond, ThenGen,
6380                    [](CodeGenFunction &, PrePostActionTy &) {});
6381     } else {
6382       RegionCodeGenTy ThenRCG(ThenGen);
6383       ThenRCG(CGF);
6384     }
6385   }
6386 }
6387 
6388 namespace {
6389 /// Cleanup action for uses_allocators support.
6390 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6391   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6392 
6393 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6394   OMPUsesAllocatorsActionTy(
6395       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6396       : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6397   void Enter(CodeGenFunction &CGF) override {
6398     if (!CGF.HaveInsertPoint())
6399       return;
6400     for (const auto &AllocatorData : Allocators) {
6401       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6402           CGF, AllocatorData.first, AllocatorData.second);
6403     }
6404   }
Exit(CodeGenFunction & CGF)6405   void Exit(CodeGenFunction &CGF) override {
6406     if (!CGF.HaveInsertPoint())
6407       return;
6408     for (const auto &AllocatorData : Allocators) {
6409       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6410                                                         AllocatorData.first);
6411     }
6412   }
6413 };
6414 } // namespace
6415 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6416 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6417     const OMPExecutableDirective &D, StringRef ParentName,
6418     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6419     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6420   assert(!ParentName.empty() && "Invalid target region parent name!");
6421   HasEmittedTargetRegion = true;
6422   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6423   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6424     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6425       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6426       if (!D.AllocatorTraits)
6427         continue;
6428       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6429     }
6430   }
6431   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6432   CodeGen.setAction(UsesAllocatorAction);
6433   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6434                                    IsOffloadEntry, CodeGen);
6435 }
6436 
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6437 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6438                                              const Expr *Allocator,
6439                                              const Expr *AllocatorTraits) {
6440   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6441   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6442   // Use default memspace handle.
6443   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6444   llvm::Value *NumTraits = llvm::ConstantInt::get(
6445       CGF.IntTy, cast<ConstantArrayType>(
6446                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6447                      ->getSize()
6448                      .getLimitedValue());
6449   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6450   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6451       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6452   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6453                                            AllocatorTraitsLVal.getBaseInfo(),
6454                                            AllocatorTraitsLVal.getTBAAInfo());
6455   llvm::Value *Traits =
6456       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6457 
6458   llvm::Value *AllocatorVal =
6459       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6460                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6461                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6462   // Store to allocator.
6463   CGF.EmitVarDecl(*cast<VarDecl>(
6464       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6465   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6466   AllocatorVal =
6467       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6468                                Allocator->getType(), Allocator->getExprLoc());
6469   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6470 }
6471 
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6472 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6473                                              const Expr *Allocator) {
6474   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6477   llvm::Value *AllocatorVal =
6478       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6479   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6480                                           CGF.getContext().VoidPtrTy,
6481                                           Allocator->getExprLoc());
6482   (void)CGF.EmitRuntimeCall(
6483       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6484                                             OMPRTL___kmpc_destroy_allocator),
6485       {ThreadId, AllocatorVal});
6486 }
6487 
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6489     const OMPExecutableDirective &D, StringRef ParentName,
6490     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6491     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6492   // Create a unique name for the entry function using the source location
6493   // information of the current target region. The name will be something like:
6494   //
6495   // __omp_offloading_DD_FFFF_PP_lBB
6496   //
6497   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6498   // mangled name of the function that encloses the target region and BB is the
6499   // line number of the target region.
6500 
6501   unsigned DeviceID;
6502   unsigned FileID;
6503   unsigned Line;
6504   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505                            Line);
6506   SmallString<64> EntryFnName;
6507   {
6508     llvm::raw_svector_ostream OS(EntryFnName);
6509     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511   }
6512 
6513   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514 
6515   CodeGenFunction CGF(CGM, true);
6516   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518 
6519   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6520 
6521   // If this target outline function is not an offload entry, we don't need to
6522   // register it.
6523   if (!IsOffloadEntry)
6524     return;
6525 
6526   // The target region ID is used by the runtime library to identify the current
6527   // target region, so it only has to be unique and not necessarily point to
6528   // anything. It could be the pointer to the outlined function that implements
6529   // the target region, but we aren't using that so that the compiler doesn't
6530   // need to keep that, and could therefore inline the host function if proven
6531   // worthwhile during optimization. In the other hand, if emitting code for the
6532   // device, the ID has to be the function address so that it can retrieved from
6533   // the offloading entry and launched by the runtime library. We also mark the
6534   // outlined function to have external linkage in case we are emitting code for
6535   // the device, because these functions will be entry points to the device.
6536 
6537   if (CGM.getLangOpts().OpenMPIsDevice) {
6538     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6539     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6540     OutlinedFn->setDSOLocal(false);
6541     if (CGM.getTriple().isAMDGCN())
6542       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6543   } else {
6544     std::string Name = getName({EntryFnName, "region_id"});
6545     OutlinedFnID = new llvm::GlobalVariable(
6546         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6547         llvm::GlobalValue::WeakAnyLinkage,
6548         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6549   }
6550 
6551   // Register the information for the entry associated with this target region.
6552   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6553       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6554       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6555 
6556   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6557   int32_t DefaultValTeams = -1;
6558   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6559   if (DefaultValTeams > 0) {
6560     OutlinedFn->addFnAttr("omp_target_num_teams",
6561                           std::to_string(DefaultValTeams));
6562   }
6563   int32_t DefaultValThreads = -1;
6564   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6565   if (DefaultValThreads > 0) {
6566     OutlinedFn->addFnAttr("omp_target_thread_limit",
6567                           std::to_string(DefaultValThreads));
6568   }
6569 }
6570 
6571 /// Checks if the expression is constant or does not have non-trivial function
6572 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6573 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6574   // We can skip constant expressions.
6575   // We can skip expressions with trivial calls or simple expressions.
6576   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6577           !E->hasNonTrivialCall(Ctx)) &&
6578          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6579 }
6580 
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6581 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6582                                                     const Stmt *Body) {
6583   const Stmt *Child = Body->IgnoreContainers();
6584   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6585     Child = nullptr;
6586     for (const Stmt *S : C->body()) {
6587       if (const auto *E = dyn_cast<Expr>(S)) {
6588         if (isTrivial(Ctx, E))
6589           continue;
6590       }
6591       // Some of the statements can be ignored.
6592       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6593           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6594         continue;
6595       // Analyze declarations.
6596       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6597         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6598               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6599                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6600                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6601                   isa<UsingDirectiveDecl>(D) ||
6602                   isa<OMPDeclareReductionDecl>(D) ||
6603                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6604                 return true;
6605               const auto *VD = dyn_cast<VarDecl>(D);
6606               if (!VD)
6607                 return false;
6608               return VD->hasGlobalStorage() || !VD->isUsed();
6609             }))
6610           continue;
6611       }
6612       // Found multiple children - cannot get the one child only.
6613       if (Child)
6614         return nullptr;
6615       Child = S;
6616     }
6617     if (Child)
6618       Child = Child->IgnoreContainers();
6619   }
6620   return Child;
6621 }
6622 
getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & DefaultVal)6623 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6624     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6625     int32_t &DefaultVal) {
6626 
6627   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6628   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6629          "Expected target-based executable directive.");
6630   switch (DirectiveKind) {
6631   case OMPD_target: {
6632     const auto *CS = D.getInnermostCapturedStmt();
6633     const auto *Body =
6634         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6635     const Stmt *ChildStmt =
6636         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6637     if (const auto *NestedDir =
6638             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6639       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6640         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6641           const Expr *NumTeams =
6642               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6644             if (auto Constant =
6645                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6646               DefaultVal = Constant->getExtValue();
6647           return NumTeams;
6648         }
6649         DefaultVal = 0;
6650         return nullptr;
6651       }
6652       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6653           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6654         DefaultVal = 1;
6655         return nullptr;
6656       }
6657       DefaultVal = 1;
6658       return nullptr;
6659     }
6660     // A value of -1 is used to check if we need to emit no teams region
6661     DefaultVal = -1;
6662     return nullptr;
6663   }
6664   case OMPD_target_teams:
6665   case OMPD_target_teams_distribute:
6666   case OMPD_target_teams_distribute_simd:
6667   case OMPD_target_teams_distribute_parallel_for:
6668   case OMPD_target_teams_distribute_parallel_for_simd: {
6669     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6670       const Expr *NumTeams =
6671           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6673         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6674           DefaultVal = Constant->getExtValue();
6675       return NumTeams;
6676     }
6677     DefaultVal = 0;
6678     return nullptr;
6679   }
6680   case OMPD_target_parallel:
6681   case OMPD_target_parallel_for:
6682   case OMPD_target_parallel_for_simd:
6683   case OMPD_target_simd:
6684     DefaultVal = 1;
6685     return nullptr;
6686   case OMPD_parallel:
6687   case OMPD_for:
6688   case OMPD_parallel_for:
6689   case OMPD_parallel_master:
6690   case OMPD_parallel_sections:
6691   case OMPD_for_simd:
6692   case OMPD_parallel_for_simd:
6693   case OMPD_cancel:
6694   case OMPD_cancellation_point:
6695   case OMPD_ordered:
6696   case OMPD_threadprivate:
6697   case OMPD_allocate:
6698   case OMPD_task:
6699   case OMPD_simd:
6700   case OMPD_tile:
6701   case OMPD_unroll:
6702   case OMPD_sections:
6703   case OMPD_section:
6704   case OMPD_single:
6705   case OMPD_master:
6706   case OMPD_critical:
6707   case OMPD_taskyield:
6708   case OMPD_barrier:
6709   case OMPD_taskwait:
6710   case OMPD_taskgroup:
6711   case OMPD_atomic:
6712   case OMPD_flush:
6713   case OMPD_depobj:
6714   case OMPD_scan:
6715   case OMPD_teams:
6716   case OMPD_target_data:
6717   case OMPD_target_exit_data:
6718   case OMPD_target_enter_data:
6719   case OMPD_distribute:
6720   case OMPD_distribute_simd:
6721   case OMPD_distribute_parallel_for:
6722   case OMPD_distribute_parallel_for_simd:
6723   case OMPD_teams_distribute:
6724   case OMPD_teams_distribute_simd:
6725   case OMPD_teams_distribute_parallel_for:
6726   case OMPD_teams_distribute_parallel_for_simd:
6727   case OMPD_target_update:
6728   case OMPD_declare_simd:
6729   case OMPD_declare_variant:
6730   case OMPD_begin_declare_variant:
6731   case OMPD_end_declare_variant:
6732   case OMPD_declare_target:
6733   case OMPD_end_declare_target:
6734   case OMPD_declare_reduction:
6735   case OMPD_declare_mapper:
6736   case OMPD_taskloop:
6737   case OMPD_taskloop_simd:
6738   case OMPD_master_taskloop:
6739   case OMPD_master_taskloop_simd:
6740   case OMPD_parallel_master_taskloop:
6741   case OMPD_parallel_master_taskloop_simd:
6742   case OMPD_requires:
6743   case OMPD_unknown:
6744     break;
6745   default:
6746     break;
6747   }
6748   llvm_unreachable("Unexpected directive kind.");
6749 }
6750 
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6751 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6752     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6753   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6754          "Clauses associated with the teams directive expected to be emitted "
6755          "only for the host!");
6756   CGBuilderTy &Bld = CGF.Builder;
6757   int32_t DefaultNT = -1;
6758   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6759   if (NumTeams != nullptr) {
6760     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6761 
6762     switch (DirectiveKind) {
6763     case OMPD_target: {
6764       const auto *CS = D.getInnermostCapturedStmt();
6765       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6768                                                   /*IgnoreResultAssign*/ true);
6769       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6770                              /*isSigned=*/true);
6771     }
6772     case OMPD_target_teams:
6773     case OMPD_target_teams_distribute:
6774     case OMPD_target_teams_distribute_simd:
6775     case OMPD_target_teams_distribute_parallel_for:
6776     case OMPD_target_teams_distribute_parallel_for_simd: {
6777       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6778       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6779                                                   /*IgnoreResultAssign*/ true);
6780       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6781                              /*isSigned=*/true);
6782     }
6783     default:
6784       break;
6785     }
6786   } else if (DefaultNT == -1) {
6787     return nullptr;
6788   }
6789 
6790   return Bld.getInt32(DefaultNT);
6791 }
6792 
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6793 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6794                                   llvm::Value *DefaultThreadLimitVal) {
6795   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6796       CGF.getContext(), CS->getCapturedStmt());
6797   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6798     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6799       llvm::Value *NumThreads = nullptr;
6800       llvm::Value *CondVal = nullptr;
6801       // Handle if clause. If if clause present, the number of threads is
6802       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6803       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6804         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6805         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6806         const OMPIfClause *IfClause = nullptr;
6807         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6808           if (C->getNameModifier() == OMPD_unknown ||
6809               C->getNameModifier() == OMPD_parallel) {
6810             IfClause = C;
6811             break;
6812           }
6813         }
6814         if (IfClause) {
6815           const Expr *Cond = IfClause->getCondition();
6816           bool Result;
6817           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6818             if (!Result)
6819               return CGF.Builder.getInt32(1);
6820           } else {
6821             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6822             if (const auto *PreInit =
6823                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6824               for (const auto *I : PreInit->decls()) {
6825                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6826                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6827                 } else {
6828                   CodeGenFunction::AutoVarEmission Emission =
6829                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6830                   CGF.EmitAutoVarCleanups(Emission);
6831                 }
6832               }
6833             }
6834             CondVal = CGF.EvaluateExprAsBool(Cond);
6835           }
6836         }
6837       }
6838       // Check the value of num_threads clause iff if clause was not specified
6839       // or is not evaluated to false.
6840       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6841         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843         const auto *NumThreadsClause =
6844             Dir->getSingleClause<OMPNumThreadsClause>();
6845         CodeGenFunction::LexicalScope Scope(
6846             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6847         if (const auto *PreInit =
6848                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6849           for (const auto *I : PreInit->decls()) {
6850             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851               CGF.EmitVarDecl(cast<VarDecl>(*I));
6852             } else {
6853               CodeGenFunction::AutoVarEmission Emission =
6854                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855               CGF.EmitAutoVarCleanups(Emission);
6856             }
6857           }
6858         }
6859         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6860         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6861                                                /*isSigned=*/false);
6862         if (DefaultThreadLimitVal)
6863           NumThreads = CGF.Builder.CreateSelect(
6864               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6865               DefaultThreadLimitVal, NumThreads);
6866       } else {
6867         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6868                                            : CGF.Builder.getInt32(0);
6869       }
6870       // Process condition of the if clause.
6871       if (CondVal) {
6872         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6873                                               CGF.Builder.getInt32(1));
6874       }
6875       return NumThreads;
6876     }
6877     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6878       return CGF.Builder.getInt32(1);
6879     return DefaultThreadLimitVal;
6880   }
6881   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6882                                : CGF.Builder.getInt32(0);
6883 }
6884 
getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & DefaultVal)6885 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6886     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6887     int32_t &DefaultVal) {
6888   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6889   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6890          "Expected target-based executable directive.");
6891 
6892   switch (DirectiveKind) {
6893   case OMPD_target:
6894     // Teams have no clause thread_limit
6895     return nullptr;
6896   case OMPD_target_teams:
6897   case OMPD_target_teams_distribute:
6898     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6899       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6900       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6901       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6902         if (auto Constant =
6903                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6904           DefaultVal = Constant->getExtValue();
6905       return ThreadLimit;
6906     }
6907     return nullptr;
6908   case OMPD_target_parallel:
6909   case OMPD_target_parallel_for:
6910   case OMPD_target_parallel_for_simd:
6911   case OMPD_target_teams_distribute_parallel_for:
6912   case OMPD_target_teams_distribute_parallel_for_simd: {
6913     Expr *ThreadLimit = nullptr;
6914     Expr *NumThreads = nullptr;
6915     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6916       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6917       ThreadLimit = ThreadLimitClause->getThreadLimit();
6918       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6919         if (auto Constant =
6920                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6921           DefaultVal = Constant->getExtValue();
6922     }
6923     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6924       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6925       NumThreads = NumThreadsClause->getNumThreads();
6926       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6927         if (auto Constant =
6928                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6929           if (Constant->getExtValue() < DefaultVal) {
6930             DefaultVal = Constant->getExtValue();
6931             ThreadLimit = NumThreads;
6932           }
6933         }
6934       }
6935     }
6936     return ThreadLimit;
6937   }
6938   case OMPD_target_teams_distribute_simd:
6939   case OMPD_target_simd:
6940     DefaultVal = 1;
6941     return nullptr;
6942   case OMPD_parallel:
6943   case OMPD_for:
6944   case OMPD_parallel_for:
6945   case OMPD_parallel_master:
6946   case OMPD_parallel_sections:
6947   case OMPD_for_simd:
6948   case OMPD_parallel_for_simd:
6949   case OMPD_cancel:
6950   case OMPD_cancellation_point:
6951   case OMPD_ordered:
6952   case OMPD_threadprivate:
6953   case OMPD_allocate:
6954   case OMPD_task:
6955   case OMPD_simd:
6956   case OMPD_tile:
6957   case OMPD_unroll:
6958   case OMPD_sections:
6959   case OMPD_section:
6960   case OMPD_single:
6961   case OMPD_master:
6962   case OMPD_critical:
6963   case OMPD_taskyield:
6964   case OMPD_barrier:
6965   case OMPD_taskwait:
6966   case OMPD_taskgroup:
6967   case OMPD_atomic:
6968   case OMPD_flush:
6969   case OMPD_depobj:
6970   case OMPD_scan:
6971   case OMPD_teams:
6972   case OMPD_target_data:
6973   case OMPD_target_exit_data:
6974   case OMPD_target_enter_data:
6975   case OMPD_distribute:
6976   case OMPD_distribute_simd:
6977   case OMPD_distribute_parallel_for:
6978   case OMPD_distribute_parallel_for_simd:
6979   case OMPD_teams_distribute:
6980   case OMPD_teams_distribute_simd:
6981   case OMPD_teams_distribute_parallel_for:
6982   case OMPD_teams_distribute_parallel_for_simd:
6983   case OMPD_target_update:
6984   case OMPD_declare_simd:
6985   case OMPD_declare_variant:
6986   case OMPD_begin_declare_variant:
6987   case OMPD_end_declare_variant:
6988   case OMPD_declare_target:
6989   case OMPD_end_declare_target:
6990   case OMPD_declare_reduction:
6991   case OMPD_declare_mapper:
6992   case OMPD_taskloop:
6993   case OMPD_taskloop_simd:
6994   case OMPD_master_taskloop:
6995   case OMPD_master_taskloop_simd:
6996   case OMPD_parallel_master_taskloop:
6997   case OMPD_parallel_master_taskloop_simd:
6998   case OMPD_requires:
6999   case OMPD_unknown:
7000     break;
7001   default:
7002     break;
7003   }
7004   llvm_unreachable("Unsupported directive kind.");
7005 }
7006 
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)7007 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7008     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7009   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7010          "Clauses associated with the teams directive expected to be emitted "
7011          "only for the host!");
7012   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7013   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7014          "Expected target-based executable directive.");
7015   CGBuilderTy &Bld = CGF.Builder;
7016   llvm::Value *ThreadLimitVal = nullptr;
7017   llvm::Value *NumThreadsVal = nullptr;
7018   switch (DirectiveKind) {
7019   case OMPD_target: {
7020     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7021     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7022       return NumThreads;
7023     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7024         CGF.getContext(), CS->getCapturedStmt());
7025     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7026       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7027         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7028         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7029         const auto *ThreadLimitClause =
7030             Dir->getSingleClause<OMPThreadLimitClause>();
7031         CodeGenFunction::LexicalScope Scope(
7032             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7033         if (const auto *PreInit =
7034                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7035           for (const auto *I : PreInit->decls()) {
7036             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7037               CGF.EmitVarDecl(cast<VarDecl>(*I));
7038             } else {
7039               CodeGenFunction::AutoVarEmission Emission =
7040                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7041               CGF.EmitAutoVarCleanups(Emission);
7042             }
7043           }
7044         }
7045         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7046             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7047         ThreadLimitVal =
7048             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7049       }
7050       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7051           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7052         CS = Dir->getInnermostCapturedStmt();
7053         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7054             CGF.getContext(), CS->getCapturedStmt());
7055         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7056       }
7057       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7058           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7059         CS = Dir->getInnermostCapturedStmt();
7060         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7061           return NumThreads;
7062       }
7063       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7064         return Bld.getInt32(1);
7065     }
7066     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7067   }
7068   case OMPD_target_teams: {
7069     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7070       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7071       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7072       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7073           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7074       ThreadLimitVal =
7075           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7076     }
7077     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7078     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7079       return NumThreads;
7080     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7081         CGF.getContext(), CS->getCapturedStmt());
7082     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7083       if (Dir->getDirectiveKind() == OMPD_distribute) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086           return NumThreads;
7087       }
7088     }
7089     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7090   }
7091   case OMPD_target_teams_distribute:
7092     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7093       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7094       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7095       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7096           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7097       ThreadLimitVal =
7098           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7099     }
7100     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7101   case OMPD_target_parallel:
7102   case OMPD_target_parallel_for:
7103   case OMPD_target_parallel_for_simd:
7104   case OMPD_target_teams_distribute_parallel_for:
7105   case OMPD_target_teams_distribute_parallel_for_simd: {
7106     llvm::Value *CondVal = nullptr;
7107     // Handle if clause. If if clause present, the number of threads is
7108     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7109     if (D.hasClausesOfKind<OMPIfClause>()) {
7110       const OMPIfClause *IfClause = nullptr;
7111       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7112         if (C->getNameModifier() == OMPD_unknown ||
7113             C->getNameModifier() == OMPD_parallel) {
7114           IfClause = C;
7115           break;
7116         }
7117       }
7118       if (IfClause) {
7119         const Expr *Cond = IfClause->getCondition();
7120         bool Result;
7121         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7122           if (!Result)
7123             return Bld.getInt32(1);
7124         } else {
7125           CodeGenFunction::RunCleanupsScope Scope(CGF);
7126           CondVal = CGF.EvaluateExprAsBool(Cond);
7127         }
7128       }
7129     }
7130     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7131       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7132       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7133       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7134           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7135       ThreadLimitVal =
7136           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7137     }
7138     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7139       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7140       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7141       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7142           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7143       NumThreadsVal =
7144           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7145       ThreadLimitVal = ThreadLimitVal
7146                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7147                                                                 ThreadLimitVal),
7148                                               NumThreadsVal, ThreadLimitVal)
7149                            : NumThreadsVal;
7150     }
7151     if (!ThreadLimitVal)
7152       ThreadLimitVal = Bld.getInt32(0);
7153     if (CondVal)
7154       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7155     return ThreadLimitVal;
7156   }
7157   case OMPD_target_teams_distribute_simd:
7158   case OMPD_target_simd:
7159     return Bld.getInt32(1);
7160   case OMPD_parallel:
7161   case OMPD_for:
7162   case OMPD_parallel_for:
7163   case OMPD_parallel_master:
7164   case OMPD_parallel_sections:
7165   case OMPD_for_simd:
7166   case OMPD_parallel_for_simd:
7167   case OMPD_cancel:
7168   case OMPD_cancellation_point:
7169   case OMPD_ordered:
7170   case OMPD_threadprivate:
7171   case OMPD_allocate:
7172   case OMPD_task:
7173   case OMPD_simd:
7174   case OMPD_tile:
7175   case OMPD_unroll:
7176   case OMPD_sections:
7177   case OMPD_section:
7178   case OMPD_single:
7179   case OMPD_master:
7180   case OMPD_critical:
7181   case OMPD_taskyield:
7182   case OMPD_barrier:
7183   case OMPD_taskwait:
7184   case OMPD_taskgroup:
7185   case OMPD_atomic:
7186   case OMPD_flush:
7187   case OMPD_depobj:
7188   case OMPD_scan:
7189   case OMPD_teams:
7190   case OMPD_target_data:
7191   case OMPD_target_exit_data:
7192   case OMPD_target_enter_data:
7193   case OMPD_distribute:
7194   case OMPD_distribute_simd:
7195   case OMPD_distribute_parallel_for:
7196   case OMPD_distribute_parallel_for_simd:
7197   case OMPD_teams_distribute:
7198   case OMPD_teams_distribute_simd:
7199   case OMPD_teams_distribute_parallel_for:
7200   case OMPD_teams_distribute_parallel_for_simd:
7201   case OMPD_target_update:
7202   case OMPD_declare_simd:
7203   case OMPD_declare_variant:
7204   case OMPD_begin_declare_variant:
7205   case OMPD_end_declare_variant:
7206   case OMPD_declare_target:
7207   case OMPD_end_declare_target:
7208   case OMPD_declare_reduction:
7209   case OMPD_declare_mapper:
7210   case OMPD_taskloop:
7211   case OMPD_taskloop_simd:
7212   case OMPD_master_taskloop:
7213   case OMPD_master_taskloop_simd:
7214   case OMPD_parallel_master_taskloop:
7215   case OMPD_parallel_master_taskloop_simd:
7216   case OMPD_requires:
7217   case OMPD_unknown:
7218     break;
7219   default:
7220     break;
7221   }
7222   llvm_unreachable("Unsupported directive kind.");
7223 }
7224 
7225 namespace {
7226 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7227 
7228 // Utility to handle information from clauses associated with a given
7229 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7230 // It provides a convenient interface to obtain the information and generate
7231 // code for that information.
7232 class MappableExprsHandler {
7233 public:
7234   /// Values for bit flags used to specify the mapping type for
7235   /// offloading.
7236   enum OpenMPOffloadMappingFlags : uint64_t {
7237     /// No flags
7238     OMP_MAP_NONE = 0x0,
7239     /// Allocate memory on the device and move data from host to device.
7240     OMP_MAP_TO = 0x01,
7241     /// Allocate memory on the device and move data from device to host.
7242     OMP_MAP_FROM = 0x02,
7243     /// Always perform the requested mapping action on the element, even
7244     /// if it was already mapped before.
7245     OMP_MAP_ALWAYS = 0x04,
7246     /// Delete the element from the device environment, ignoring the
7247     /// current reference count associated with the element.
7248     OMP_MAP_DELETE = 0x08,
7249     /// The element being mapped is a pointer-pointee pair; both the
7250     /// pointer and the pointee should be mapped.
7251     OMP_MAP_PTR_AND_OBJ = 0x10,
7252     /// This flags signals that the base address of an entry should be
7253     /// passed to the target kernel as an argument.
7254     OMP_MAP_TARGET_PARAM = 0x20,
7255     /// Signal that the runtime library has to return the device pointer
7256     /// in the current position for the data being mapped. Used when we have the
7257     /// use_device_ptr or use_device_addr clause.
7258     OMP_MAP_RETURN_PARAM = 0x40,
7259     /// This flag signals that the reference being passed is a pointer to
7260     /// private data.
7261     OMP_MAP_PRIVATE = 0x80,
7262     /// Pass the element to the device by value.
7263     OMP_MAP_LITERAL = 0x100,
7264     /// Implicit map
7265     OMP_MAP_IMPLICIT = 0x200,
7266     /// Close is a hint to the runtime to allocate memory close to
7267     /// the target device.
7268     OMP_MAP_CLOSE = 0x400,
7269     /// 0x800 is reserved for compatibility with XLC.
7270     /// Produce a runtime error if the data is not already allocated.
7271     OMP_MAP_PRESENT = 0x1000,
7272     /// Signal that the runtime library should use args as an array of
7273     /// descriptor_dim pointers and use args_size as dims. Used when we have
7274     /// non-contiguous list items in target update directive
7275     OMP_MAP_NON_CONTIG = 0x100000000000,
7276     /// The 16 MSBs of the flags indicate whether the entry is member of some
7277     /// struct/class.
7278     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7279     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7280   };
7281 
7282   /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()7283   static unsigned getFlagMemberOffset() {
7284     unsigned Offset = 0;
7285     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7286          Remain = Remain >> 1)
7287       Offset++;
7288     return Offset;
7289   }
7290 
7291   /// Class that holds debugging information for a data mapping to be passed to
7292   /// the runtime library.
7293   class MappingExprInfo {
7294     /// The variable declaration used for the data mapping.
7295     const ValueDecl *MapDecl = nullptr;
7296     /// The original expression used in the map clause, or null if there is
7297     /// none.
7298     const Expr *MapExpr = nullptr;
7299 
7300   public:
MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)7301     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7302         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7303 
getMapDecl() const7304     const ValueDecl *getMapDecl() const { return MapDecl; }
getMapExpr() const7305     const Expr *getMapExpr() const { return MapExpr; }
7306   };
7307 
7308   /// Class that associates information with a base pointer to be passed to the
7309   /// runtime library.
7310   class BasePointerInfo {
7311     /// The base pointer.
7312     llvm::Value *Ptr = nullptr;
7313     /// The base declaration that refers to this device pointer, or null if
7314     /// there is none.
7315     const ValueDecl *DevPtrDecl = nullptr;
7316 
7317   public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)7318     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7319         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const7320     llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const7321     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)7322     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7323   };
7324 
7325   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7326   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7327   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7328   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7329   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7330   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7331   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7332 
7333   /// This structure contains combined information generated for mappable
7334   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7335   /// mappers, and non-contiguous information.
7336   struct MapCombinedInfoTy {
7337     struct StructNonContiguousInfo {
7338       bool IsNonContiguous = false;
7339       MapDimArrayTy Dims;
7340       MapNonContiguousArrayTy Offsets;
7341       MapNonContiguousArrayTy Counts;
7342       MapNonContiguousArrayTy Strides;
7343     };
7344     MapExprsArrayTy Exprs;
7345     MapBaseValuesArrayTy BasePointers;
7346     MapValuesArrayTy Pointers;
7347     MapValuesArrayTy Sizes;
7348     MapFlagsArrayTy Types;
7349     MapMappersArrayTy Mappers;
7350     StructNonContiguousInfo NonContigInfo;
7351 
7352     /// Append arrays in \a CurInfo.
append__anone1a752753d11::MappableExprsHandler::MapCombinedInfoTy7353     void append(MapCombinedInfoTy &CurInfo) {
7354       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7355       BasePointers.append(CurInfo.BasePointers.begin(),
7356                           CurInfo.BasePointers.end());
7357       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7358       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7359       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7360       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7361       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7362                                  CurInfo.NonContigInfo.Dims.end());
7363       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7364                                     CurInfo.NonContigInfo.Offsets.end());
7365       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7366                                    CurInfo.NonContigInfo.Counts.end());
7367       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7368                                     CurInfo.NonContigInfo.Strides.end());
7369     }
7370   };
7371 
7372   /// Map between a struct and the its lowest & highest elements which have been
7373   /// mapped.
7374   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7375   ///                    HE(FieldIndex, Pointer)}
7376   struct StructRangeInfoTy {
7377     MapCombinedInfoTy PreliminaryMapData;
7378     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7379         0, Address::invalid()};
7380     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7381         0, Address::invalid()};
7382     Address Base = Address::invalid();
7383     Address LB = Address::invalid();
7384     bool IsArraySection = false;
7385     bool HasCompleteRecord = false;
7386   };
7387 
7388 private:
7389   /// Kind that defines how a device pointer has to be returned.
7390   struct MapInfo {
7391     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7392     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7393     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7394     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7395     bool ReturnDevicePointer = false;
7396     bool IsImplicit = false;
7397     const ValueDecl *Mapper = nullptr;
7398     const Expr *VarRef = nullptr;
7399     bool ForDeviceAddr = false;
7400 
7401     MapInfo() = default;
MapInfo__anone1a752753d11::MappableExprsHandler::MapInfo7402     MapInfo(
7403         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7404         OpenMPMapClauseKind MapType,
7405         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7406         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7407         bool ReturnDevicePointer, bool IsImplicit,
7408         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7409         bool ForDeviceAddr = false)
7410         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7411           MotionModifiers(MotionModifiers),
7412           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7413           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7414   };
7415 
7416   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7417   /// member and there is no map information about it, then emission of that
7418   /// entry is deferred until the whole struct has been processed.
7419   struct DeferredDevicePtrEntryTy {
7420     const Expr *IE = nullptr;
7421     const ValueDecl *VD = nullptr;
7422     bool ForDeviceAddr = false;
7423 
DeferredDevicePtrEntryTy__anone1a752753d11::MappableExprsHandler::DeferredDevicePtrEntryTy7424     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7425                              bool ForDeviceAddr)
7426         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7427   };
7428 
7429   /// The target directive from where the mappable clauses were extracted. It
7430   /// is either a executable directive or a user-defined mapper directive.
7431   llvm::PointerUnion<const OMPExecutableDirective *,
7432                      const OMPDeclareMapperDecl *>
7433       CurDir;
7434 
7435   /// Function the directive is being generated for.
7436   CodeGenFunction &CGF;
7437 
7438   /// Set of all first private variables in the current directive.
7439   /// bool data is set to true if the variable is implicitly marked as
7440   /// firstprivate, false otherwise.
7441   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7442 
7443   /// Map between device pointer declarations and their expression components.
7444   /// The key value for declarations in 'this' is null.
7445   llvm::DenseMap<
7446       const ValueDecl *,
7447       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7448       DevPointersMap;
7449 
getExprTypeSize(const Expr * E) const7450   llvm::Value *getExprTypeSize(const Expr *E) const {
7451     QualType ExprTy = E->getType().getCanonicalType();
7452 
7453     // Calculate the size for array shaping expression.
7454     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7455       llvm::Value *Size =
7456           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7457       for (const Expr *SE : OAE->getDimensions()) {
7458         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7459         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7460                                       CGF.getContext().getSizeType(),
7461                                       SE->getExprLoc());
7462         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7463       }
7464       return Size;
7465     }
7466 
7467     // Reference types are ignored for mapping purposes.
7468     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7469       ExprTy = RefTy->getPointeeType().getCanonicalType();
7470 
7471     // Given that an array section is considered a built-in type, we need to
7472     // do the calculation based on the length of the section instead of relying
7473     // on CGF.getTypeSize(E->getType()).
7474     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7475       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7476                             OAE->getBase()->IgnoreParenImpCasts())
7477                             .getCanonicalType();
7478 
7479       // If there is no length associated with the expression and lower bound is
7480       // not specified too, that means we are using the whole length of the
7481       // base.
7482       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7483           !OAE->getLowerBound())
7484         return CGF.getTypeSize(BaseTy);
7485 
7486       llvm::Value *ElemSize;
7487       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7488         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7489       } else {
7490         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7491         assert(ATy && "Expecting array type if not a pointer type.");
7492         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7493       }
7494 
7495       // If we don't have a length at this point, that is because we have an
7496       // array section with a single element.
7497       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7498         return ElemSize;
7499 
7500       if (const Expr *LenExpr = OAE->getLength()) {
7501         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7502         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7503                                              CGF.getContext().getSizeType(),
7504                                              LenExpr->getExprLoc());
7505         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7506       }
7507       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7508              OAE->getLowerBound() && "expected array_section[lb:].");
7509       // Size = sizetype - lb * elemtype;
7510       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7511       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7512       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7513                                        CGF.getContext().getSizeType(),
7514                                        OAE->getLowerBound()->getExprLoc());
7515       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7516       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7517       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7518       LengthVal = CGF.Builder.CreateSelect(
7519           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7520       return LengthVal;
7521     }
7522     return CGF.getTypeSize(ExprTy);
7523   }
7524 
7525   /// Return the corresponding bits for a given map clause modifier. Add
7526   /// a flag marking the map as a pointer if requested. Add a flag marking the
7527   /// map as the first one of a series of maps that relate to the same map
7528   /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const7529   OpenMPOffloadMappingFlags getMapTypeBits(
7530       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7531       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7532       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7533     OpenMPOffloadMappingFlags Bits =
7534         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7535     switch (MapType) {
7536     case OMPC_MAP_alloc:
7537     case OMPC_MAP_release:
7538       // alloc and release is the default behavior in the runtime library,  i.e.
7539       // if we don't pass any bits alloc/release that is what the runtime is
7540       // going to do. Therefore, we don't need to signal anything for these two
7541       // type modifiers.
7542       break;
7543     case OMPC_MAP_to:
7544       Bits |= OMP_MAP_TO;
7545       break;
7546     case OMPC_MAP_from:
7547       Bits |= OMP_MAP_FROM;
7548       break;
7549     case OMPC_MAP_tofrom:
7550       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7551       break;
7552     case OMPC_MAP_delete:
7553       Bits |= OMP_MAP_DELETE;
7554       break;
7555     case OMPC_MAP_unknown:
7556       llvm_unreachable("Unexpected map type!");
7557     }
7558     if (AddPtrFlag)
7559       Bits |= OMP_MAP_PTR_AND_OBJ;
7560     if (AddIsTargetParamFlag)
7561       Bits |= OMP_MAP_TARGET_PARAM;
7562     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7563         != MapModifiers.end())
7564       Bits |= OMP_MAP_ALWAYS;
7565     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7566         != MapModifiers.end())
7567       Bits |= OMP_MAP_CLOSE;
7568     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7569             MapModifiers.end() ||
7570         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7571             MotionModifiers.end())
7572       Bits |= OMP_MAP_PRESENT;
7573     if (IsNonContiguous)
7574       Bits |= OMP_MAP_NON_CONTIG;
7575     return Bits;
7576   }
7577 
7578   /// Return true if the provided expression is a final array section. A
7579   /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7580   bool isFinalArraySectionExpression(const Expr *E) const {
7581     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7582 
7583     // It is not an array section and therefore not a unity-size one.
7584     if (!OASE)
7585       return false;
7586 
7587     // An array section with no colon always refer to a single element.
7588     if (OASE->getColonLocFirst().isInvalid())
7589       return false;
7590 
7591     const Expr *Length = OASE->getLength();
7592 
7593     // If we don't have a length we have to check if the array has size 1
7594     // for this dimension. Also, we should always expect a length if the
7595     // base type is pointer.
7596     if (!Length) {
7597       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7598                              OASE->getBase()->IgnoreParenImpCasts())
7599                              .getCanonicalType();
7600       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7601         return ATy->getSize().getSExtValue() != 1;
7602       // If we don't have a constant dimension length, we have to consider
7603       // the current section as having any size, so it is not necessarily
7604       // unitary. If it happen to be unity size, that's user fault.
7605       return true;
7606     }
7607 
7608     // Check if the length evaluates to 1.
7609     Expr::EvalResult Result;
7610     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7611       return true; // Can have more that size 1.
7612 
7613     llvm::APSInt ConstLength = Result.Val.getInt();
7614     return ConstLength.getSExtValue() != 1;
7615   }
7616 
7617   /// Generate the base pointers, section pointers, sizes, map type bits, and
7618   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7619   /// map type, map or motion modifiers, and expression components.
7620   /// \a IsFirstComponent should be set to true if the provided set of
7621   /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7622   void generateInfoForComponentList(
7623       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7624       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7625       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7626       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7627       bool IsFirstComponentList, bool IsImplicit,
7628       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7629       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7630       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7631           OverlappedElements = llvm::None) const {
7632     // The following summarizes what has to be generated for each map and the
7633     // types below. The generated information is expressed in this order:
7634     // base pointer, section pointer, size, flags
7635     // (to add to the ones that come from the map type and modifier).
7636     //
7637     // double d;
7638     // int i[100];
7639     // float *p;
7640     //
7641     // struct S1 {
7642     //   int i;
7643     //   float f[50];
7644     // }
7645     // struct S2 {
7646     //   int i;
7647     //   float f[50];
7648     //   S1 s;
7649     //   double *p;
7650     //   struct S2 *ps;
7651     //   int &ref;
7652     // }
7653     // S2 s;
7654     // S2 *ps;
7655     //
7656     // map(d)
7657     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7658     //
7659     // map(i)
7660     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7661     //
7662     // map(i[1:23])
7663     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7664     //
7665     // map(p)
7666     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7667     //
7668     // map(p[1:24])
7669     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7670     // in unified shared memory mode or for local pointers
7671     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7672     //
7673     // map(s)
7674     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7675     //
7676     // map(s.i)
7677     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7678     //
7679     // map(s.s.f)
7680     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7681     //
7682     // map(s.p)
7683     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7684     //
7685     // map(to: s.p[:22])
7686     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7687     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7688     // &(s.p), &(s.p[0]), 22*sizeof(double),
7689     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7690     // (*) alloc space for struct members, only this is a target parameter
7691     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7692     //      optimizes this entry out, same in the examples below)
7693     // (***) map the pointee (map: to)
7694     //
7695     // map(to: s.ref)
7696     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7697     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7698     // (*) alloc space for struct members, only this is a target parameter
7699     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7700     //      optimizes this entry out, same in the examples below)
7701     // (***) map the pointee (map: to)
7702     //
7703     // map(s.ps)
7704     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7705     //
7706     // map(from: s.ps->s.i)
7707     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7708     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7709     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7710     //
7711     // map(to: s.ps->ps)
7712     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7713     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7714     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7715     //
7716     // map(s.ps->ps->ps)
7717     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7718     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7719     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7720     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7721     //
7722     // map(to: s.ps->ps->s.f[:22])
7723     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7724     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7725     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7726     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7727     //
7728     // map(ps)
7729     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7730     //
7731     // map(ps->i)
7732     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7733     //
7734     // map(ps->s.f)
7735     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7736     //
7737     // map(from: ps->p)
7738     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7739     //
7740     // map(to: ps->p[:22])
7741     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7742     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7743     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7744     //
7745     // map(ps->ps)
7746     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7747     //
7748     // map(from: ps->ps->s.i)
7749     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7750     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7751     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7752     //
7753     // map(from: ps->ps->ps)
7754     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7755     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7756     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7757     //
7758     // map(ps->ps->ps->ps)
7759     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7760     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7761     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7762     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7763     //
7764     // map(to: ps->ps->ps->s.f[:22])
7765     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7766     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7767     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7768     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7769     //
7770     // map(to: s.f[:22]) map(from: s.p[:33])
7771     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7772     //     sizeof(double*) (**), TARGET_PARAM
7773     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7774     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7775     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7776     // (*) allocate contiguous space needed to fit all mapped members even if
7777     //     we allocate space for members not mapped (in this example,
7778     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7779     //     them as well because they fall between &s.f[0] and &s.p)
7780     //
7781     // map(from: s.f[:22]) map(to: ps->p[:33])
7782     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7783     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7784     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7785     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7786     // (*) the struct this entry pertains to is the 2nd element in the list of
7787     //     arguments, hence MEMBER_OF(2)
7788     //
7789     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7790     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7791     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7792     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7793     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7794     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7795     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7796     // (*) the struct this entry pertains to is the 4th element in the list
7797     //     of arguments, hence MEMBER_OF(4)
7798 
7799     // Track if the map information being generated is the first for a capture.
7800     bool IsCaptureFirstInfo = IsFirstComponentList;
7801     // When the variable is on a declare target link or in a to clause with
7802     // unified memory, a reference is needed to hold the host/device address
7803     // of the variable.
7804     bool RequiresReference = false;
7805 
7806     // Scan the components from the base to the complete expression.
7807     auto CI = Components.rbegin();
7808     auto CE = Components.rend();
7809     auto I = CI;
7810 
7811     // Track if the map information being generated is the first for a list of
7812     // components.
7813     bool IsExpressionFirstInfo = true;
7814     bool FirstPointerInComplexData = false;
7815     Address BP = Address::invalid();
7816     const Expr *AssocExpr = I->getAssociatedExpression();
7817     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7818     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7819     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7820 
7821     if (isa<MemberExpr>(AssocExpr)) {
7822       // The base is the 'this' pointer. The content of the pointer is going
7823       // to be the base of the field being mapped.
7824       BP = CGF.LoadCXXThisAddress();
7825     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7826                (OASE &&
7827                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7828       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7829     } else if (OAShE &&
7830                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7831       BP = Address(
7832           CGF.EmitScalarExpr(OAShE->getBase()),
7833           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7834     } else {
7835       // The base is the reference to the variable.
7836       // BP = &Var.
7837       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7838       if (const auto *VD =
7839               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7840         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7841                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7842           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7843               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7844                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7845             RequiresReference = true;
7846             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7847           }
7848         }
7849       }
7850 
7851       // If the variable is a pointer and is being dereferenced (i.e. is not
7852       // the last component), the base has to be the pointer itself, not its
7853       // reference. References are ignored for mapping purposes.
7854       QualType Ty =
7855           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7856       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7857         // No need to generate individual map information for the pointer, it
7858         // can be associated with the combined storage if shared memory mode is
7859         // active or the base declaration is not global variable.
7860         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7861         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7862             !VD || VD->hasLocalStorage())
7863           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7864         else
7865           FirstPointerInComplexData = true;
7866         ++I;
7867       }
7868     }
7869 
7870     // Track whether a component of the list should be marked as MEMBER_OF some
7871     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7872     // in a component list should be marked as MEMBER_OF, all subsequent entries
7873     // do not belong to the base struct. E.g.
7874     // struct S2 s;
7875     // s.ps->ps->ps->f[:]
7876     //   (1) (2) (3) (4)
7877     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7878     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7879     // is the pointee of ps(2) which is not member of struct s, so it should not
7880     // be marked as such (it is still PTR_AND_OBJ).
7881     // The variable is initialized to false so that PTR_AND_OBJ entries which
7882     // are not struct members are not considered (e.g. array of pointers to
7883     // data).
7884     bool ShouldBeMemberOf = false;
7885 
7886     // Variable keeping track of whether or not we have encountered a component
7887     // in the component list which is a member expression. Useful when we have a
7888     // pointer or a final array section, in which case it is the previous
7889     // component in the list which tells us whether we have a member expression.
7890     // E.g. X.f[:]
7891     // While processing the final array section "[:]" it is "f" which tells us
7892     // whether we are dealing with a member of a declared struct.
7893     const MemberExpr *EncounteredME = nullptr;
7894 
7895     // Track for the total number of dimension. Start from one for the dummy
7896     // dimension.
7897     uint64_t DimSize = 1;
7898 
7899     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7900     bool IsPrevMemberReference = false;
7901 
7902     for (; I != CE; ++I) {
7903       // If the current component is member of a struct (parent struct) mark it.
7904       if (!EncounteredME) {
7905         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7906         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7907         // as MEMBER_OF the parent struct.
7908         if (EncounteredME) {
7909           ShouldBeMemberOf = true;
7910           // Do not emit as complex pointer if this is actually not array-like
7911           // expression.
7912           if (FirstPointerInComplexData) {
7913             QualType Ty = std::prev(I)
7914                               ->getAssociatedDeclaration()
7915                               ->getType()
7916                               .getNonReferenceType();
7917             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7918             FirstPointerInComplexData = false;
7919           }
7920         }
7921       }
7922 
7923       auto Next = std::next(I);
7924 
7925       // We need to generate the addresses and sizes if this is the last
7926       // component, if the component is a pointer or if it is an array section
7927       // whose length can't be proved to be one. If this is a pointer, it
7928       // becomes the base address for the following components.
7929 
7930       // A final array section, is one whose length can't be proved to be one.
7931       // If the map item is non-contiguous then we don't treat any array section
7932       // as final array section.
7933       bool IsFinalArraySection =
7934           !IsNonContiguous &&
7935           isFinalArraySectionExpression(I->getAssociatedExpression());
7936 
7937       // If we have a declaration for the mapping use that, otherwise use
7938       // the base declaration of the map clause.
7939       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7940                                      ? I->getAssociatedDeclaration()
7941                                      : BaseDecl;
7942       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7943                                                : MapExpr;
7944 
7945       // Get information on whether the element is a pointer. Have to do a
7946       // special treatment for array sections given that they are built-in
7947       // types.
7948       const auto *OASE =
7949           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7950       const auto *OAShE =
7951           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7952       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7953       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7954       bool IsPointer =
7955           OAShE ||
7956           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7957                        .getCanonicalType()
7958                        ->isAnyPointerType()) ||
7959           I->getAssociatedExpression()->getType()->isAnyPointerType();
7960       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7961                                MapDecl &&
7962                                MapDecl->getType()->isLValueReferenceType();
7963       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7964 
7965       if (OASE)
7966         ++DimSize;
7967 
7968       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7969           IsFinalArraySection) {
7970         // If this is not the last component, we expect the pointer to be
7971         // associated with an array expression or member expression.
7972         assert((Next == CE ||
7973                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7974                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7975                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7976                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7977                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7978                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7979                "Unexpected expression");
7980 
7981         Address LB = Address::invalid();
7982         Address LowestElem = Address::invalid();
7983         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7984                                        const MemberExpr *E) {
7985           const Expr *BaseExpr = E->getBase();
7986           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7987           // scalar.
7988           LValue BaseLV;
7989           if (E->isArrow()) {
7990             LValueBaseInfo BaseInfo;
7991             TBAAAccessInfo TBAAInfo;
7992             Address Addr =
7993                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7994             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7995             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7996           } else {
7997             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7998           }
7999           return BaseLV;
8000         };
8001         if (OAShE) {
8002           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8003                                     CGF.getContext().getTypeAlignInChars(
8004                                         OAShE->getBase()->getType()));
8005         } else if (IsMemberReference) {
8006           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8007           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8008           LowestElem = CGF.EmitLValueForFieldInitialization(
8009                               BaseLVal, cast<FieldDecl>(MapDecl))
8010                            .getAddress(CGF);
8011           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8012                    .getAddress(CGF);
8013         } else {
8014           LowestElem = LB =
8015               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8016                   .getAddress(CGF);
8017         }
8018 
8019         // If this component is a pointer inside the base struct then we don't
8020         // need to create any entry for it - it will be combined with the object
8021         // it is pointing to into a single PTR_AND_OBJ entry.
8022         bool IsMemberPointerOrAddr =
8023             EncounteredME &&
8024             (((IsPointer || ForDeviceAddr) &&
8025               I->getAssociatedExpression() == EncounteredME) ||
8026              (IsPrevMemberReference && !IsPointer) ||
8027              (IsMemberReference && Next != CE &&
8028               !Next->getAssociatedExpression()->getType()->isPointerType()));
8029         if (!OverlappedElements.empty() && Next == CE) {
8030           // Handle base element with the info for overlapped elements.
8031           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8032           assert(!IsPointer &&
8033                  "Unexpected base element with the pointer type.");
8034           // Mark the whole struct as the struct that requires allocation on the
8035           // device.
8036           PartialStruct.LowestElem = {0, LowestElem};
8037           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8038               I->getAssociatedExpression()->getType());
8039           Address HB = CGF.Builder.CreateConstGEP(
8040               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8041                                                               CGF.VoidPtrTy),
8042               TypeSize.getQuantity() - 1);
8043           PartialStruct.HighestElem = {
8044               std::numeric_limits<decltype(
8045                   PartialStruct.HighestElem.first)>::max(),
8046               HB};
8047           PartialStruct.Base = BP;
8048           PartialStruct.LB = LB;
8049           assert(
8050               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8051               "Overlapped elements must be used only once for the variable.");
8052           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8053           // Emit data for non-overlapped data.
8054           OpenMPOffloadMappingFlags Flags =
8055               OMP_MAP_MEMBER_OF |
8056               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8057                              /*AddPtrFlag=*/false,
8058                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8059           llvm::Value *Size = nullptr;
8060           // Do bitcopy of all non-overlapped structure elements.
8061           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8062                    Component : OverlappedElements) {
8063             Address ComponentLB = Address::invalid();
8064             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8065                  Component) {
8066               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8067                 const auto *FD = dyn_cast<FieldDecl>(VD);
8068                 if (FD && FD->getType()->isLValueReferenceType()) {
8069                   const auto *ME =
8070                       cast<MemberExpr>(MC.getAssociatedExpression());
8071                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8072                   ComponentLB =
8073                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8074                           .getAddress(CGF);
8075                 } else {
8076                   ComponentLB =
8077                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8078                           .getAddress(CGF);
8079                 }
8080                 Size = CGF.Builder.CreatePtrDiff(
8081                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8082                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8083                 break;
8084               }
8085             }
8086             assert(Size && "Failed to determine structure size");
8087             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8088             CombinedInfo.BasePointers.push_back(BP.getPointer());
8089             CombinedInfo.Pointers.push_back(LB.getPointer());
8090             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8091                 Size, CGF.Int64Ty, /*isSigned=*/true));
8092             CombinedInfo.Types.push_back(Flags);
8093             CombinedInfo.Mappers.push_back(nullptr);
8094             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8095                                                                       : 1);
8096             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8097           }
8098           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8099           CombinedInfo.BasePointers.push_back(BP.getPointer());
8100           CombinedInfo.Pointers.push_back(LB.getPointer());
8101           Size = CGF.Builder.CreatePtrDiff(
8102               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8103               CGF.EmitCastToVoidPtr(LB.getPointer()));
8104           CombinedInfo.Sizes.push_back(
8105               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8106           CombinedInfo.Types.push_back(Flags);
8107           CombinedInfo.Mappers.push_back(nullptr);
8108           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8109                                                                     : 1);
8110           break;
8111         }
8112         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8113         if (!IsMemberPointerOrAddr ||
8114             (Next == CE && MapType != OMPC_MAP_unknown)) {
8115           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8116           CombinedInfo.BasePointers.push_back(BP.getPointer());
8117           CombinedInfo.Pointers.push_back(LB.getPointer());
8118           CombinedInfo.Sizes.push_back(
8119               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8120           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8121                                                                     : 1);
8122 
8123           // If Mapper is valid, the last component inherits the mapper.
8124           bool HasMapper = Mapper && Next == CE;
8125           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8126 
8127           // We need to add a pointer flag for each map that comes from the
8128           // same expression except for the first one. We also need to signal
8129           // this map is the first one that relates with the current capture
8130           // (there is a set of entries for each capture).
8131           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8132               MapType, MapModifiers, MotionModifiers, IsImplicit,
8133               !IsExpressionFirstInfo || RequiresReference ||
8134                   FirstPointerInComplexData || IsMemberReference,
8135               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8136 
8137           if (!IsExpressionFirstInfo || IsMemberReference) {
8138             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8139             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8140             if (IsPointer || (IsMemberReference && Next != CE))
8141               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8142                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8143 
8144             if (ShouldBeMemberOf) {
8145               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8146               // should be later updated with the correct value of MEMBER_OF.
8147               Flags |= OMP_MAP_MEMBER_OF;
8148               // From now on, all subsequent PTR_AND_OBJ entries should not be
8149               // marked as MEMBER_OF.
8150               ShouldBeMemberOf = false;
8151             }
8152           }
8153 
8154           CombinedInfo.Types.push_back(Flags);
8155         }
8156 
8157         // If we have encountered a member expression so far, keep track of the
8158         // mapped member. If the parent is "*this", then the value declaration
8159         // is nullptr.
8160         if (EncounteredME) {
8161           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8162           unsigned FieldIndex = FD->getFieldIndex();
8163 
8164           // Update info about the lowest and highest elements for this struct
8165           if (!PartialStruct.Base.isValid()) {
8166             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8167             if (IsFinalArraySection) {
8168               Address HB =
8169                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8170                       .getAddress(CGF);
8171               PartialStruct.HighestElem = {FieldIndex, HB};
8172             } else {
8173               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8174             }
8175             PartialStruct.Base = BP;
8176             PartialStruct.LB = BP;
8177           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8178             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8179           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8180             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8181           }
8182         }
8183 
8184         // Need to emit combined struct for array sections.
8185         if (IsFinalArraySection || IsNonContiguous)
8186           PartialStruct.IsArraySection = true;
8187 
8188         // If we have a final array section, we are done with this expression.
8189         if (IsFinalArraySection)
8190           break;
8191 
8192         // The pointer becomes the base for the next element.
8193         if (Next != CE)
8194           BP = IsMemberReference ? LowestElem : LB;
8195 
8196         IsExpressionFirstInfo = false;
8197         IsCaptureFirstInfo = false;
8198         FirstPointerInComplexData = false;
8199         IsPrevMemberReference = IsMemberReference;
8200       } else if (FirstPointerInComplexData) {
8201         QualType Ty = Components.rbegin()
8202                           ->getAssociatedDeclaration()
8203                           ->getType()
8204                           .getNonReferenceType();
8205         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8206         FirstPointerInComplexData = false;
8207       }
8208     }
8209     // If ran into the whole component - allocate the space for the whole
8210     // record.
8211     if (!EncounteredME)
8212       PartialStruct.HasCompleteRecord = true;
8213 
8214     if (!IsNonContiguous)
8215       return;
8216 
8217     const ASTContext &Context = CGF.getContext();
8218 
8219     // For supporting stride in array section, we need to initialize the first
8220     // dimension size as 1, first offset as 0, and first count as 1
8221     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8222     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8223     MapValuesArrayTy CurStrides;
8224     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8225     uint64_t ElementTypeSize;
8226 
8227     // Collect Size information for each dimension and get the element size as
8228     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8229     // should be [10, 10] and the first stride is 4 btyes.
8230     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8231          Components) {
8232       const Expr *AssocExpr = Component.getAssociatedExpression();
8233       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8234 
8235       if (!OASE)
8236         continue;
8237 
8238       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8239       auto *CAT = Context.getAsConstantArrayType(Ty);
8240       auto *VAT = Context.getAsVariableArrayType(Ty);
8241 
8242       // We need all the dimension size except for the last dimension.
8243       assert((VAT || CAT || &Component == &*Components.begin()) &&
8244              "Should be either ConstantArray or VariableArray if not the "
8245              "first Component");
8246 
8247       // Get element size if CurStrides is empty.
8248       if (CurStrides.empty()) {
8249         const Type *ElementType = nullptr;
8250         if (CAT)
8251           ElementType = CAT->getElementType().getTypePtr();
8252         else if (VAT)
8253           ElementType = VAT->getElementType().getTypePtr();
8254         else
8255           assert(&Component == &*Components.begin() &&
8256                  "Only expect pointer (non CAT or VAT) when this is the "
8257                  "first Component");
8258         // If ElementType is null, then it means the base is a pointer
8259         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8260         // for next iteration.
8261         if (ElementType) {
8262           // For the case that having pointer as base, we need to remove one
8263           // level of indirection.
8264           if (&Component != &*Components.begin())
8265             ElementType = ElementType->getPointeeOrArrayElementType();
8266           ElementTypeSize =
8267               Context.getTypeSizeInChars(ElementType).getQuantity();
8268           CurStrides.push_back(
8269               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8270         }
8271       }
8272       // Get dimension value except for the last dimension since we don't need
8273       // it.
8274       if (DimSizes.size() < Components.size() - 1) {
8275         if (CAT)
8276           DimSizes.push_back(llvm::ConstantInt::get(
8277               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8278         else if (VAT)
8279           DimSizes.push_back(CGF.Builder.CreateIntCast(
8280               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8281               /*IsSigned=*/false));
8282       }
8283     }
8284 
8285     // Skip the dummy dimension since we have already have its information.
8286     auto DI = DimSizes.begin() + 1;
8287     // Product of dimension.
8288     llvm::Value *DimProd =
8289         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8290 
8291     // Collect info for non-contiguous. Notice that offset, count, and stride
8292     // are only meaningful for array-section, so we insert a null for anything
8293     // other than array-section.
8294     // Also, the size of offset, count, and stride are not the same as
8295     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8296     // count, and stride are the same as the number of non-contiguous
8297     // declaration in target update to/from clause.
8298     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8299          Components) {
8300       const Expr *AssocExpr = Component.getAssociatedExpression();
8301 
8302       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8303         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8304             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8305             /*isSigned=*/false);
8306         CurOffsets.push_back(Offset);
8307         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8308         CurStrides.push_back(CurStrides.back());
8309         continue;
8310       }
8311 
8312       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8313 
8314       if (!OASE)
8315         continue;
8316 
8317       // Offset
8318       const Expr *OffsetExpr = OASE->getLowerBound();
8319       llvm::Value *Offset = nullptr;
8320       if (!OffsetExpr) {
8321         // If offset is absent, then we just set it to zero.
8322         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8323       } else {
8324         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8325                                            CGF.Int64Ty,
8326                                            /*isSigned=*/false);
8327       }
8328       CurOffsets.push_back(Offset);
8329 
8330       // Count
8331       const Expr *CountExpr = OASE->getLength();
8332       llvm::Value *Count = nullptr;
8333       if (!CountExpr) {
8334         // In Clang, once a high dimension is an array section, we construct all
8335         // the lower dimension as array section, however, for case like
8336         // arr[0:2][2], Clang construct the inner dimension as an array section
8337         // but it actually is not in an array section form according to spec.
8338         if (!OASE->getColonLocFirst().isValid() &&
8339             !OASE->getColonLocSecond().isValid()) {
8340           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8341         } else {
8342           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8343           // When the length is absent it defaults to ⌈(size −
8344           // lower-bound)/stride⌉, where size is the size of the array
8345           // dimension.
8346           const Expr *StrideExpr = OASE->getStride();
8347           llvm::Value *Stride =
8348               StrideExpr
8349                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8350                                               CGF.Int64Ty, /*isSigned=*/false)
8351                   : nullptr;
8352           if (Stride)
8353             Count = CGF.Builder.CreateUDiv(
8354                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8355           else
8356             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8357         }
8358       } else {
8359         Count = CGF.EmitScalarExpr(CountExpr);
8360       }
8361       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8362       CurCounts.push_back(Count);
8363 
8364       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8365       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8366       //              Offset      Count     Stride
8367       //    D0          0           1         4    (int)    <- dummy dimension
8368       //    D1          0           2         8    (2 * (1) * 4)
8369       //    D2          1           2         20   (1 * (1 * 5) * 4)
8370       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8371       const Expr *StrideExpr = OASE->getStride();
8372       llvm::Value *Stride =
8373           StrideExpr
8374               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8375                                           CGF.Int64Ty, /*isSigned=*/false)
8376               : nullptr;
8377       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8378       if (Stride)
8379         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8380       else
8381         CurStrides.push_back(DimProd);
8382       if (DI != DimSizes.end())
8383         ++DI;
8384     }
8385 
8386     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8387     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8388     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8389   }
8390 
8391   /// Return the adjusted map modifiers if the declaration a capture refers to
8392   /// appears in a first-private clause. This is expected to be used only with
8393   /// directives that start with 'target'.
8394   MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const8395   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8396     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8397 
8398     // A first private variable captured by reference will use only the
8399     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8400     // declaration is known as first-private in this handler.
8401     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8402       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8403         return MappableExprsHandler::OMP_MAP_TO |
8404                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8405       return MappableExprsHandler::OMP_MAP_PRIVATE |
8406              MappableExprsHandler::OMP_MAP_TO;
8407     }
8408     return MappableExprsHandler::OMP_MAP_TO |
8409            MappableExprsHandler::OMP_MAP_FROM;
8410   }
8411 
getMemberOfFlag(unsigned Position)8412   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8413     // Rotate by getFlagMemberOffset() bits.
8414     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8415                                                   << getFlagMemberOffset());
8416   }
8417 
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)8418   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8419                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8420     // If the entry is PTR_AND_OBJ but has not been marked with the special
8421     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8422     // marked as MEMBER_OF.
8423     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8424         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8425       return;
8426 
8427     // Reset the placeholder value to prepare the flag for the assignment of the
8428     // proper MEMBER_OF value.
8429     Flags &= ~OMP_MAP_MEMBER_OF;
8430     Flags |= MemberOfFlag;
8431   }
8432 
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const8433   void getPlainLayout(const CXXRecordDecl *RD,
8434                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8435                       bool AsBase) const {
8436     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8437 
8438     llvm::StructType *St =
8439         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8440 
8441     unsigned NumElements = St->getNumElements();
8442     llvm::SmallVector<
8443         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8444         RecordLayout(NumElements);
8445 
8446     // Fill bases.
8447     for (const auto &I : RD->bases()) {
8448       if (I.isVirtual())
8449         continue;
8450       const auto *Base = I.getType()->getAsCXXRecordDecl();
8451       // Ignore empty bases.
8452       if (Base->isEmpty() || CGF.getContext()
8453                                  .getASTRecordLayout(Base)
8454                                  .getNonVirtualSize()
8455                                  .isZero())
8456         continue;
8457 
8458       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8459       RecordLayout[FieldIndex] = Base;
8460     }
8461     // Fill in virtual bases.
8462     for (const auto &I : RD->vbases()) {
8463       const auto *Base = I.getType()->getAsCXXRecordDecl();
8464       // Ignore empty bases.
8465       if (Base->isEmpty())
8466         continue;
8467       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8468       if (RecordLayout[FieldIndex])
8469         continue;
8470       RecordLayout[FieldIndex] = Base;
8471     }
8472     // Fill in all the fields.
8473     assert(!RD->isUnion() && "Unexpected union.");
8474     for (const auto *Field : RD->fields()) {
8475       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8476       // will fill in later.)
8477       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8478         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8479         RecordLayout[FieldIndex] = Field;
8480       }
8481     }
8482     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8483              &Data : RecordLayout) {
8484       if (Data.isNull())
8485         continue;
8486       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8487         getPlainLayout(Base, Layout, /*AsBase=*/true);
8488       else
8489         Layout.push_back(Data.get<const FieldDecl *>());
8490     }
8491   }
8492 
8493   /// Generate all the base pointers, section pointers, sizes, map types, and
8494   /// mappers for the extracted mappable expressions (all included in \a
8495   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8496   /// pair of the relevant declaration and index where it occurs is appended to
8497   /// the device pointers info array.
generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8498   void generateAllInfoForClauses(
8499       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8500       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8501           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8502     // We have to process the component lists that relate with the same
8503     // declaration in a single chunk so that we can generate the map flags
8504     // correctly. Therefore, we organize all lists in a map.
8505     enum MapKind { Present, Allocs, Other, Total };
8506     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8507                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8508         Info;
8509 
8510     // Helper function to fill the information map for the different supported
8511     // clauses.
8512     auto &&InfoGen =
8513         [&Info, &SkipVarSet](
8514             const ValueDecl *D, MapKind Kind,
8515             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8516             OpenMPMapClauseKind MapType,
8517             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8518             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8519             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8520             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8521           if (SkipVarSet.contains(D))
8522             return;
8523           auto It = Info.find(D);
8524           if (It == Info.end())
8525             It = Info
8526                      .insert(std::make_pair(
8527                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8528                      .first;
8529           It->second[Kind].emplace_back(
8530               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8531               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8532         };
8533 
8534     for (const auto *Cl : Clauses) {
8535       const auto *C = dyn_cast<OMPMapClause>(Cl);
8536       if (!C)
8537         continue;
8538       MapKind Kind = Other;
8539       if (!C->getMapTypeModifiers().empty() &&
8540           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8541             return K == OMPC_MAP_MODIFIER_present;
8542           }))
8543         Kind = Present;
8544       else if (C->getMapType() == OMPC_MAP_alloc)
8545         Kind = Allocs;
8546       const auto *EI = C->getVarRefs().begin();
8547       for (const auto L : C->component_lists()) {
8548         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8549         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8550                 C->getMapTypeModifiers(), llvm::None,
8551                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8552                 E);
8553         ++EI;
8554       }
8555     }
8556     for (const auto *Cl : Clauses) {
8557       const auto *C = dyn_cast<OMPToClause>(Cl);
8558       if (!C)
8559         continue;
8560       MapKind Kind = Other;
8561       if (!C->getMotionModifiers().empty() &&
8562           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8563             return K == OMPC_MOTION_MODIFIER_present;
8564           }))
8565         Kind = Present;
8566       const auto *EI = C->getVarRefs().begin();
8567       for (const auto L : C->component_lists()) {
8568         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8569                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8570                 C->isImplicit(), std::get<2>(L), *EI);
8571         ++EI;
8572       }
8573     }
8574     for (const auto *Cl : Clauses) {
8575       const auto *C = dyn_cast<OMPFromClause>(Cl);
8576       if (!C)
8577         continue;
8578       MapKind Kind = Other;
8579       if (!C->getMotionModifiers().empty() &&
8580           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8581             return K == OMPC_MOTION_MODIFIER_present;
8582           }))
8583         Kind = Present;
8584       const auto *EI = C->getVarRefs().begin();
8585       for (const auto L : C->component_lists()) {
8586         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8587                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8588                 C->isImplicit(), std::get<2>(L), *EI);
8589         ++EI;
8590       }
8591     }
8592 
8593     // Look at the use_device_ptr clause information and mark the existing map
8594     // entries as such. If there is no map information for an entry in the
8595     // use_device_ptr list, we create one with map type 'alloc' and zero size
8596     // section. It is the user fault if that was not mapped before. If there is
8597     // no map information and the pointer is a struct member, then we defer the
8598     // emission of that entry until the whole struct has been processed.
8599     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8600                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8601         DeferredInfo;
8602     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8603 
8604     for (const auto *Cl : Clauses) {
8605       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8606       if (!C)
8607         continue;
8608       for (const auto L : C->component_lists()) {
8609         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8610             std::get<1>(L);
8611         assert(!Components.empty() &&
8612                "Not expecting empty list of components!");
8613         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8614         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8615         const Expr *IE = Components.back().getAssociatedExpression();
8616         // If the first component is a member expression, we have to look into
8617         // 'this', which maps to null in the map of map information. Otherwise
8618         // look directly for the information.
8619         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8620 
8621         // We potentially have map information for this declaration already.
8622         // Look for the first set of components that refer to it.
8623         if (It != Info.end()) {
8624           bool Found = false;
8625           for (auto &Data : It->second) {
8626             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8627               return MI.Components.back().getAssociatedDeclaration() == VD;
8628             });
8629             // If we found a map entry, signal that the pointer has to be
8630             // returned and move on to the next declaration. Exclude cases where
8631             // the base pointer is mapped as array subscript, array section or
8632             // array shaping. The base address is passed as a pointer to base in
8633             // this case and cannot be used as a base for use_device_ptr list
8634             // item.
8635             if (CI != Data.end()) {
8636               auto PrevCI = std::next(CI->Components.rbegin());
8637               const auto *VarD = dyn_cast<VarDecl>(VD);
8638               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8639                   isa<MemberExpr>(IE) ||
8640                   !VD->getType().getNonReferenceType()->isPointerType() ||
8641                   PrevCI == CI->Components.rend() ||
8642                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8643                   VarD->hasLocalStorage()) {
8644                 CI->ReturnDevicePointer = true;
8645                 Found = true;
8646                 break;
8647               }
8648             }
8649           }
8650           if (Found)
8651             continue;
8652         }
8653 
8654         // We didn't find any match in our map information - generate a zero
8655         // size array section - if the pointer is a struct member we defer this
8656         // action until the whole struct has been processed.
8657         if (isa<MemberExpr>(IE)) {
8658           // Insert the pointer into Info to be processed by
8659           // generateInfoForComponentList. Because it is a member pointer
8660           // without a pointee, no entry will be generated for it, therefore
8661           // we need to generate one after the whole struct has been processed.
8662           // Nonetheless, generateInfoForComponentList must be called to take
8663           // the pointer into account for the calculation of the range of the
8664           // partial struct.
8665           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8666                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8667                   nullptr);
8668           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8669         } else {
8670           llvm::Value *Ptr =
8671               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8672           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8673           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8674           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8675           UseDevicePtrCombinedInfo.Sizes.push_back(
8676               llvm::Constant::getNullValue(CGF.Int64Ty));
8677           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8678           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8679         }
8680       }
8681     }
8682 
8683     // Look at the use_device_addr clause information and mark the existing map
8684     // entries as such. If there is no map information for an entry in the
8685     // use_device_addr list, we create one with map type 'alloc' and zero size
8686     // section. It is the user fault if that was not mapped before. If there is
8687     // no map information and the pointer is a struct member, then we defer the
8688     // emission of that entry until the whole struct has been processed.
8689     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8690     for (const auto *Cl : Clauses) {
8691       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8692       if (!C)
8693         continue;
8694       for (const auto L : C->component_lists()) {
8695         assert(!std::get<1>(L).empty() &&
8696                "Not expecting empty list of components!");
8697         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8698         if (!Processed.insert(VD).second)
8699           continue;
8700         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8701         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8702         // If the first component is a member expression, we have to look into
8703         // 'this', which maps to null in the map of map information. Otherwise
8704         // look directly for the information.
8705         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8706 
8707         // We potentially have map information for this declaration already.
8708         // Look for the first set of components that refer to it.
8709         if (It != Info.end()) {
8710           bool Found = false;
8711           for (auto &Data : It->second) {
8712             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8713               return MI.Components.back().getAssociatedDeclaration() == VD;
8714             });
8715             // If we found a map entry, signal that the pointer has to be
8716             // returned and move on to the next declaration.
8717             if (CI != Data.end()) {
8718               CI->ReturnDevicePointer = true;
8719               Found = true;
8720               break;
8721             }
8722           }
8723           if (Found)
8724             continue;
8725         }
8726 
8727         // We didn't find any match in our map information - generate a zero
8728         // size array section - if the pointer is a struct member we defer this
8729         // action until the whole struct has been processed.
8730         if (isa<MemberExpr>(IE)) {
8731           // Insert the pointer into Info to be processed by
8732           // generateInfoForComponentList. Because it is a member pointer
8733           // without a pointee, no entry will be generated for it, therefore
8734           // we need to generate one after the whole struct has been processed.
8735           // Nonetheless, generateInfoForComponentList must be called to take
8736           // the pointer into account for the calculation of the range of the
8737           // partial struct.
8738           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8739                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8740                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8741           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8742         } else {
8743           llvm::Value *Ptr;
8744           if (IE->isGLValue())
8745             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8746           else
8747             Ptr = CGF.EmitScalarExpr(IE);
8748           CombinedInfo.Exprs.push_back(VD);
8749           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8750           CombinedInfo.Pointers.push_back(Ptr);
8751           CombinedInfo.Sizes.push_back(
8752               llvm::Constant::getNullValue(CGF.Int64Ty));
8753           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8754           CombinedInfo.Mappers.push_back(nullptr);
8755         }
8756       }
8757     }
8758 
8759     for (const auto &Data : Info) {
8760       StructRangeInfoTy PartialStruct;
8761       // Temporary generated information.
8762       MapCombinedInfoTy CurInfo;
8763       const Decl *D = Data.first;
8764       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8765       for (const auto &M : Data.second) {
8766         for (const MapInfo &L : M) {
8767           assert(!L.Components.empty() &&
8768                  "Not expecting declaration with no component lists.");
8769 
8770           // Remember the current base pointer index.
8771           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8772           CurInfo.NonContigInfo.IsNonContiguous =
8773               L.Components.back().isNonContiguous();
8774           generateInfoForComponentList(
8775               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8776               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8777               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8778 
8779           // If this entry relates with a device pointer, set the relevant
8780           // declaration and add the 'return pointer' flag.
8781           if (L.ReturnDevicePointer) {
8782             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8783                    "Unexpected number of mapped base pointers.");
8784 
8785             const ValueDecl *RelevantVD =
8786                 L.Components.back().getAssociatedDeclaration();
8787             assert(RelevantVD &&
8788                    "No relevant declaration related with device pointer??");
8789 
8790             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8791                 RelevantVD);
8792             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8793           }
8794         }
8795       }
8796 
8797       // Append any pending zero-length pointers which are struct members and
8798       // used with use_device_ptr or use_device_addr.
8799       auto CI = DeferredInfo.find(Data.first);
8800       if (CI != DeferredInfo.end()) {
8801         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8802           llvm::Value *BasePtr;
8803           llvm::Value *Ptr;
8804           if (L.ForDeviceAddr) {
8805             if (L.IE->isGLValue())
8806               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8807             else
8808               Ptr = this->CGF.EmitScalarExpr(L.IE);
8809             BasePtr = Ptr;
8810             // Entry is RETURN_PARAM. Also, set the placeholder value
8811             // MEMBER_OF=FFFF so that the entry is later updated with the
8812             // correct value of MEMBER_OF.
8813             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8814           } else {
8815             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8816             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8817                                              L.IE->getExprLoc());
8818             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8819             // placeholder value MEMBER_OF=FFFF so that the entry is later
8820             // updated with the correct value of MEMBER_OF.
8821             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8822                                     OMP_MAP_MEMBER_OF);
8823           }
8824           CurInfo.Exprs.push_back(L.VD);
8825           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8826           CurInfo.Pointers.push_back(Ptr);
8827           CurInfo.Sizes.push_back(
8828               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8829           CurInfo.Mappers.push_back(nullptr);
8830         }
8831       }
8832       // If there is an entry in PartialStruct it means we have a struct with
8833       // individual members mapped. Emit an extra combined entry.
8834       if (PartialStruct.Base.isValid()) {
8835         CurInfo.NonContigInfo.Dims.push_back(0);
8836         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8837       }
8838 
8839       // We need to append the results of this capture to what we already
8840       // have.
8841       CombinedInfo.append(CurInfo);
8842     }
8843     // Append data for use_device_ptr clauses.
8844     CombinedInfo.append(UseDevicePtrCombinedInfo);
8845   }
8846 
8847 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8848   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8849       : CurDir(&Dir), CGF(CGF) {
8850     // Extract firstprivate clause information.
8851     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8852       for (const auto *D : C->varlists())
8853         FirstPrivateDecls.try_emplace(
8854             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8855     // Extract implicit firstprivates from uses_allocators clauses.
8856     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8857       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8858         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8859         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8860           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8861                                         /*Implicit=*/true);
8862         else if (const auto *VD = dyn_cast<VarDecl>(
8863                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8864                          ->getDecl()))
8865           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8866       }
8867     }
8868     // Extract device pointer clause information.
8869     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8870       for (auto L : C->component_lists())
8871         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8872   }
8873 
8874   /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8875   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8876       : CurDir(&Dir), CGF(CGF) {}
8877 
8878   /// Generate code for the combined entry if we have a partially mapped struct
8879   /// and take care of the mapping flags of the arguments corresponding to
8880   /// individual struct members.
emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,const ValueDecl * VD=nullptr,bool NotTargetParams=true) const8881   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8882                          MapFlagsArrayTy &CurTypes,
8883                          const StructRangeInfoTy &PartialStruct,
8884                          const ValueDecl *VD = nullptr,
8885                          bool NotTargetParams = true) const {
8886     if (CurTypes.size() == 1 &&
8887         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8888         !PartialStruct.IsArraySection)
8889       return;
8890     Address LBAddr = PartialStruct.LowestElem.second;
8891     Address HBAddr = PartialStruct.HighestElem.second;
8892     if (PartialStruct.HasCompleteRecord) {
8893       LBAddr = PartialStruct.LB;
8894       HBAddr = PartialStruct.LB;
8895     }
8896     CombinedInfo.Exprs.push_back(VD);
8897     // Base is the base of the struct
8898     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8899     // Pointer is the address of the lowest element
8900     llvm::Value *LB = LBAddr.getPointer();
8901     CombinedInfo.Pointers.push_back(LB);
8902     // There should not be a mapper for a combined entry.
8903     CombinedInfo.Mappers.push_back(nullptr);
8904     // Size is (addr of {highest+1} element) - (addr of lowest element)
8905     llvm::Value *HB = HBAddr.getPointer();
8906     llvm::Value *HAddr =
8907         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8908     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8909     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8910     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8911     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8912                                                   /*isSigned=*/false);
8913     CombinedInfo.Sizes.push_back(Size);
8914     // Map type is always TARGET_PARAM, if generate info for captures.
8915     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8916                                                  : OMP_MAP_TARGET_PARAM);
8917     // If any element has the present modifier, then make sure the runtime
8918     // doesn't attempt to allocate the struct.
8919     if (CurTypes.end() !=
8920         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8921           return Type & OMP_MAP_PRESENT;
8922         }))
8923       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8924     // Remove TARGET_PARAM flag from the first element
8925     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8926 
8927     // All other current entries will be MEMBER_OF the combined entry
8928     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8929     // 0xFFFF in the MEMBER_OF field).
8930     OpenMPOffloadMappingFlags MemberOfFlag =
8931         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8932     for (auto &M : CurTypes)
8933       setCorrectMemberOfFlag(M, MemberOfFlag);
8934   }
8935 
8936   /// Generate all the base pointers, section pointers, sizes, map types, and
8937   /// mappers for the extracted mappable expressions (all included in \a
8938   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8939   /// pair of the relevant declaration and index where it occurs is appended to
8940   /// the device pointers info array.
generateAllInfo(MapCombinedInfoTy & CombinedInfo,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8941   void generateAllInfo(
8942       MapCombinedInfoTy &CombinedInfo,
8943       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8944           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8945     assert(CurDir.is<const OMPExecutableDirective *>() &&
8946            "Expect a executable directive");
8947     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8948     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8949   }
8950 
8951   /// Generate all the base pointers, section pointers, sizes, map types, and
8952   /// mappers for the extracted map clauses of user-defined mapper (all included
8953   /// in \a CombinedInfo).
generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo) const8954   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8955     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8956            "Expect a declare mapper directive");
8957     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8958     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8959   }
8960 
8961   /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8962   void generateInfoForLambdaCaptures(
8963       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8964       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8965     const auto *RD = VD->getType()
8966                          .getCanonicalType()
8967                          .getNonReferenceType()
8968                          ->getAsCXXRecordDecl();
8969     if (!RD || !RD->isLambda())
8970       return;
8971     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8972     LValue VDLVal = CGF.MakeAddrLValue(
8973         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8974     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8975     FieldDecl *ThisCapture = nullptr;
8976     RD->getCaptureFields(Captures, ThisCapture);
8977     if (ThisCapture) {
8978       LValue ThisLVal =
8979           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8980       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8981       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8982                                  VDLVal.getPointer(CGF));
8983       CombinedInfo.Exprs.push_back(VD);
8984       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8985       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8986       CombinedInfo.Sizes.push_back(
8987           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8988                                     CGF.Int64Ty, /*isSigned=*/true));
8989       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8990                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8991       CombinedInfo.Mappers.push_back(nullptr);
8992     }
8993     for (const LambdaCapture &LC : RD->captures()) {
8994       if (!LC.capturesVariable())
8995         continue;
8996       const VarDecl *VD = LC.getCapturedVar();
8997       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8998         continue;
8999       auto It = Captures.find(VD);
9000       assert(It != Captures.end() && "Found lambda capture without field.");
9001       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9002       if (LC.getCaptureKind() == LCK_ByRef) {
9003         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9004         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9005                                    VDLVal.getPointer(CGF));
9006         CombinedInfo.Exprs.push_back(VD);
9007         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9008         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9009         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9010             CGF.getTypeSize(
9011                 VD->getType().getCanonicalType().getNonReferenceType()),
9012             CGF.Int64Ty, /*isSigned=*/true));
9013       } else {
9014         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9015         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9016                                    VDLVal.getPointer(CGF));
9017         CombinedInfo.Exprs.push_back(VD);
9018         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9019         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9020         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9021       }
9022       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9023                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9024       CombinedInfo.Mappers.push_back(nullptr);
9025     }
9026   }
9027 
9028   /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const9029   void adjustMemberOfForLambdaCaptures(
9030       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9031       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9032       MapFlagsArrayTy &Types) const {
9033     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9034       // Set correct member_of idx for all implicit lambda captures.
9035       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9036                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9037         continue;
9038       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9039       assert(BasePtr && "Unable to find base lambda address.");
9040       int TgtIdx = -1;
9041       for (unsigned J = I; J > 0; --J) {
9042         unsigned Idx = J - 1;
9043         if (Pointers[Idx] != BasePtr)
9044           continue;
9045         TgtIdx = Idx;
9046         break;
9047       }
9048       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9049       // All other current entries will be MEMBER_OF the combined entry
9050       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9051       // 0xFFFF in the MEMBER_OF field).
9052       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9053       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9054     }
9055   }
9056 
9057   /// Generate the base pointers, section pointers, sizes, map types, and
9058   /// mappers associated to a given capture (all included in \a CombinedInfo).
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct) const9059   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9060                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9061                               StructRangeInfoTy &PartialStruct) const {
9062     assert(!Cap->capturesVariableArrayType() &&
9063            "Not expecting to generate map info for a variable array type!");
9064 
9065     // We need to know when we generating information for the first component
9066     const ValueDecl *VD = Cap->capturesThis()
9067                               ? nullptr
9068                               : Cap->getCapturedVar()->getCanonicalDecl();
9069 
9070     // If this declaration appears in a is_device_ptr clause we just have to
9071     // pass the pointer by value. If it is a reference to a declaration, we just
9072     // pass its value.
9073     if (DevPointersMap.count(VD)) {
9074       CombinedInfo.Exprs.push_back(VD);
9075       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9076       CombinedInfo.Pointers.push_back(Arg);
9077       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9078           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9079           /*isSigned=*/true));
9080       CombinedInfo.Types.push_back(
9081           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9082           OMP_MAP_TARGET_PARAM);
9083       CombinedInfo.Mappers.push_back(nullptr);
9084       return;
9085     }
9086 
9087     using MapData =
9088         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9089                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9090                    const ValueDecl *, const Expr *>;
9091     SmallVector<MapData, 4> DeclComponentLists;
9092     assert(CurDir.is<const OMPExecutableDirective *>() &&
9093            "Expect a executable directive");
9094     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9095     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9096       const auto *EI = C->getVarRefs().begin();
9097       for (const auto L : C->decl_component_lists(VD)) {
9098         const ValueDecl *VDecl, *Mapper;
9099         // The Expression is not correct if the mapping is implicit
9100         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9101         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9102         std::tie(VDecl, Components, Mapper) = L;
9103         assert(VDecl == VD && "We got information for the wrong declaration??");
9104         assert(!Components.empty() &&
9105                "Not expecting declaration with no component lists.");
9106         DeclComponentLists.emplace_back(Components, C->getMapType(),
9107                                         C->getMapTypeModifiers(),
9108                                         C->isImplicit(), Mapper, E);
9109         ++EI;
9110       }
9111     }
9112     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9113                                              const MapData &RHS) {
9114       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9115       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9116       bool HasPresent = !MapModifiers.empty() &&
9117                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9118                           return K == clang::OMPC_MAP_MODIFIER_present;
9119                         });
9120       bool HasAllocs = MapType == OMPC_MAP_alloc;
9121       MapModifiers = std::get<2>(RHS);
9122       MapType = std::get<1>(LHS);
9123       bool HasPresentR =
9124           !MapModifiers.empty() &&
9125           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9126             return K == clang::OMPC_MAP_MODIFIER_present;
9127           });
9128       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9129       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9130     });
9131 
9132     // Find overlapping elements (including the offset from the base element).
9133     llvm::SmallDenseMap<
9134         const MapData *,
9135         llvm::SmallVector<
9136             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9137         4>
9138         OverlappedData;
9139     size_t Count = 0;
9140     for (const MapData &L : DeclComponentLists) {
9141       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9142       OpenMPMapClauseKind MapType;
9143       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9144       bool IsImplicit;
9145       const ValueDecl *Mapper;
9146       const Expr *VarRef;
9147       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9148           L;
9149       ++Count;
9150       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9151         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9152         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9153                  VarRef) = L1;
9154         auto CI = Components.rbegin();
9155         auto CE = Components.rend();
9156         auto SI = Components1.rbegin();
9157         auto SE = Components1.rend();
9158         for (; CI != CE && SI != SE; ++CI, ++SI) {
9159           if (CI->getAssociatedExpression()->getStmtClass() !=
9160               SI->getAssociatedExpression()->getStmtClass())
9161             break;
9162           // Are we dealing with different variables/fields?
9163           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9164             break;
9165         }
9166         // Found overlapping if, at least for one component, reached the head
9167         // of the components list.
9168         if (CI == CE || SI == SE) {
9169           // Ignore it if it is the same component.
9170           if (CI == CE && SI == SE)
9171             continue;
9172           const auto It = (SI == SE) ? CI : SI;
9173           // If one component is a pointer and another one is a kind of
9174           // dereference of this pointer (array subscript, section, dereference,
9175           // etc.), it is not an overlapping.
9176           // Same, if one component is a base and another component is a
9177           // dereferenced pointer memberexpr with the same base.
9178           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9179               (std::prev(It)->getAssociatedDeclaration() &&
9180                std::prev(It)
9181                    ->getAssociatedDeclaration()
9182                    ->getType()
9183                    ->isPointerType()) ||
9184               (It->getAssociatedDeclaration() &&
9185                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9186                std::next(It) != CE && std::next(It) != SE))
9187             continue;
9188           const MapData &BaseData = CI == CE ? L : L1;
9189           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9190               SI == SE ? Components : Components1;
9191           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9192           OverlappedElements.getSecond().push_back(SubData);
9193         }
9194       }
9195     }
9196     // Sort the overlapped elements for each item.
9197     llvm::SmallVector<const FieldDecl *, 4> Layout;
9198     if (!OverlappedData.empty()) {
9199       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9200       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9201       while (BaseType != OrigType) {
9202         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9203         OrigType = BaseType->getPointeeOrArrayElementType();
9204       }
9205 
9206       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9207         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9208       else {
9209         const auto *RD = BaseType->getAsRecordDecl();
9210         Layout.append(RD->field_begin(), RD->field_end());
9211       }
9212     }
9213     for (auto &Pair : OverlappedData) {
9214       llvm::stable_sort(
9215           Pair.getSecond(),
9216           [&Layout](
9217               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9218               OMPClauseMappableExprCommon::MappableExprComponentListRef
9219                   Second) {
9220             auto CI = First.rbegin();
9221             auto CE = First.rend();
9222             auto SI = Second.rbegin();
9223             auto SE = Second.rend();
9224             for (; CI != CE && SI != SE; ++CI, ++SI) {
9225               if (CI->getAssociatedExpression()->getStmtClass() !=
9226                   SI->getAssociatedExpression()->getStmtClass())
9227                 break;
9228               // Are we dealing with different variables/fields?
9229               if (CI->getAssociatedDeclaration() !=
9230                   SI->getAssociatedDeclaration())
9231                 break;
9232             }
9233 
9234             // Lists contain the same elements.
9235             if (CI == CE && SI == SE)
9236               return false;
9237 
9238             // List with less elements is less than list with more elements.
9239             if (CI == CE || SI == SE)
9240               return CI == CE;
9241 
9242             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9243             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9244             if (FD1->getParent() == FD2->getParent())
9245               return FD1->getFieldIndex() < FD2->getFieldIndex();
9246             const auto *It =
9247                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9248                   return FD == FD1 || FD == FD2;
9249                 });
9250             return *It == FD1;
9251           });
9252     }
9253 
9254     // Associated with a capture, because the mapping flags depend on it.
9255     // Go through all of the elements with the overlapped elements.
9256     bool IsFirstComponentList = true;
9257     for (const auto &Pair : OverlappedData) {
9258       const MapData &L = *Pair.getFirst();
9259       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9260       OpenMPMapClauseKind MapType;
9261       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9262       bool IsImplicit;
9263       const ValueDecl *Mapper;
9264       const Expr *VarRef;
9265       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9266           L;
9267       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9268           OverlappedComponents = Pair.getSecond();
9269       generateInfoForComponentList(
9270           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9271           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9272           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9273       IsFirstComponentList = false;
9274     }
9275     // Go through other elements without overlapped elements.
9276     for (const MapData &L : DeclComponentLists) {
9277       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9278       OpenMPMapClauseKind MapType;
9279       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9280       bool IsImplicit;
9281       const ValueDecl *Mapper;
9282       const Expr *VarRef;
9283       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9284           L;
9285       auto It = OverlappedData.find(&L);
9286       if (It == OverlappedData.end())
9287         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9288                                      Components, CombinedInfo, PartialStruct,
9289                                      IsFirstComponentList, IsImplicit, Mapper,
9290                                      /*ForDeviceAddr=*/false, VD, VarRef);
9291       IsFirstComponentList = false;
9292     }
9293   }
9294 
9295   /// Generate the default map information for a given capture \a CI,
9296   /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const9297   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9298                               const FieldDecl &RI, llvm::Value *CV,
9299                               MapCombinedInfoTy &CombinedInfo) const {
9300     bool IsImplicit = true;
9301     // Do the default mapping.
9302     if (CI.capturesThis()) {
9303       CombinedInfo.Exprs.push_back(nullptr);
9304       CombinedInfo.BasePointers.push_back(CV);
9305       CombinedInfo.Pointers.push_back(CV);
9306       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9307       CombinedInfo.Sizes.push_back(
9308           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9309                                     CGF.Int64Ty, /*isSigned=*/true));
9310       // Default map type.
9311       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9312     } else if (CI.capturesVariableByCopy()) {
9313       const VarDecl *VD = CI.getCapturedVar();
9314       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9315       CombinedInfo.BasePointers.push_back(CV);
9316       CombinedInfo.Pointers.push_back(CV);
9317       if (!RI.getType()->isAnyPointerType()) {
9318         // We have to signal to the runtime captures passed by value that are
9319         // not pointers.
9320         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9321         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9322             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9323       } else {
9324         // Pointers are implicitly mapped with a zero size and no flags
9325         // (other than first map that is added for all implicit maps).
9326         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9327         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9328       }
9329       auto I = FirstPrivateDecls.find(VD);
9330       if (I != FirstPrivateDecls.end())
9331         IsImplicit = I->getSecond();
9332     } else {
9333       assert(CI.capturesVariable() && "Expected captured reference.");
9334       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9335       QualType ElementType = PtrTy->getPointeeType();
9336       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9337           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9338       // The default map type for a scalar/complex type is 'to' because by
9339       // default the value doesn't have to be retrieved. For an aggregate
9340       // type, the default is 'tofrom'.
9341       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9342       const VarDecl *VD = CI.getCapturedVar();
9343       auto I = FirstPrivateDecls.find(VD);
9344       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9345       CombinedInfo.BasePointers.push_back(CV);
9346       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9347         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9348             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9349             AlignmentSource::Decl));
9350         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9351       } else {
9352         CombinedInfo.Pointers.push_back(CV);
9353       }
9354       if (I != FirstPrivateDecls.end())
9355         IsImplicit = I->getSecond();
9356     }
9357     // Every default map produces a single argument which is a target parameter.
9358     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9359 
9360     // Add flag stating this is an implicit map.
9361     if (IsImplicit)
9362       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9363 
9364     // No user-defined mapper for default mapping.
9365     CombinedInfo.Mappers.push_back(nullptr);
9366   }
9367 };
9368 } // anonymous namespace
9369 
emitNonContiguousDescriptor(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info)9370 static void emitNonContiguousDescriptor(
9371     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9372     CGOpenMPRuntime::TargetDataInfo &Info) {
9373   CodeGenModule &CGM = CGF.CGM;
9374   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9375       &NonContigInfo = CombinedInfo.NonContigInfo;
9376 
9377   // Build an array of struct descriptor_dim and then assign it to
9378   // offload_args.
9379   //
9380   // struct descriptor_dim {
9381   //  uint64_t offset;
9382   //  uint64_t count;
9383   //  uint64_t stride
9384   // };
9385   ASTContext &C = CGF.getContext();
9386   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9387   RecordDecl *RD;
9388   RD = C.buildImplicitRecord("descriptor_dim");
9389   RD->startDefinition();
9390   addFieldToRecordDecl(C, RD, Int64Ty);
9391   addFieldToRecordDecl(C, RD, Int64Ty);
9392   addFieldToRecordDecl(C, RD, Int64Ty);
9393   RD->completeDefinition();
9394   QualType DimTy = C.getRecordType(RD);
9395 
9396   enum { OffsetFD = 0, CountFD, StrideFD };
9397   // We need two index variable here since the size of "Dims" is the same as the
9398   // size of Components, however, the size of offset, count, and stride is equal
9399   // to the size of base declaration that is non-contiguous.
9400   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9401     // Skip emitting ir if dimension size is 1 since it cannot be
9402     // non-contiguous.
9403     if (NonContigInfo.Dims[I] == 1)
9404       continue;
9405     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9406     QualType ArrayTy =
9407         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9408     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9409     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9410       unsigned RevIdx = EE - II - 1;
9411       LValue DimsLVal = CGF.MakeAddrLValue(
9412           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9413       // Offset
9414       LValue OffsetLVal = CGF.EmitLValueForField(
9415           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9416       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9417       // Count
9418       LValue CountLVal = CGF.EmitLValueForField(
9419           DimsLVal, *std::next(RD->field_begin(), CountFD));
9420       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9421       // Stride
9422       LValue StrideLVal = CGF.EmitLValueForField(
9423           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9424       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9425     }
9426     // args[I] = &dims
9427     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9428         DimsAddr, CGM.Int8PtrTy);
9429     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9430         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9431         Info.PointersArray, 0, I);
9432     Address PAddr(P, CGF.getPointerAlign());
9433     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9434     ++L;
9435   }
9436 }
9437 
9438 /// Emit a string constant containing the names of the values mapped to the
9439 /// offloading runtime library.
9440 llvm::Constant *
emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)9441 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9442                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9443   llvm::Constant *SrcLocStr;
9444   if (!MapExprs.getMapDecl()) {
9445     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9446   } else {
9447     std::string ExprName = "";
9448     if (MapExprs.getMapExpr()) {
9449       PrintingPolicy P(CGF.getContext().getLangOpts());
9450       llvm::raw_string_ostream OS(ExprName);
9451       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9452       OS.flush();
9453     } else {
9454       ExprName = MapExprs.getMapDecl()->getNameAsString();
9455     }
9456 
9457     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9458     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9459     const char *FileName = PLoc.getFilename();
9460     unsigned Line = PLoc.getLine();
9461     unsigned Column = PLoc.getColumn();
9462     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9463                                                 Line, Column);
9464   }
9465   return SrcLocStr;
9466 }
9467 
9468 /// Emit the arrays used to pass the captures and map information to the
9469 /// offloading runtime library. If there is no map or capture information,
9470 /// return nullptr by reference.
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false)9471 static void emitOffloadingArrays(
9472     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9473     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9474     bool IsNonContiguous = false) {
9475   CodeGenModule &CGM = CGF.CGM;
9476   ASTContext &Ctx = CGF.getContext();
9477 
9478   // Reset the array information.
9479   Info.clearArrayInfo();
9480   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9481 
9482   if (Info.NumberOfPtrs) {
9483     // Detect if we have any capture size requiring runtime evaluation of the
9484     // size so that a constant array could be eventually used.
9485     bool hasRuntimeEvaluationCaptureSize = false;
9486     for (llvm::Value *S : CombinedInfo.Sizes)
9487       if (!isa<llvm::Constant>(S)) {
9488         hasRuntimeEvaluationCaptureSize = true;
9489         break;
9490       }
9491 
9492     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9493     QualType PointerArrayType = Ctx.getConstantArrayType(
9494         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9495         /*IndexTypeQuals=*/0);
9496 
9497     Info.BasePointersArray =
9498         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9499     Info.PointersArray =
9500         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9501     Address MappersArray =
9502         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9503     Info.MappersArray = MappersArray.getPointer();
9504 
9505     // If we don't have any VLA types or other types that require runtime
9506     // evaluation, we can use a constant array for the map sizes, otherwise we
9507     // need to fill up the arrays as we do for the pointers.
9508     QualType Int64Ty =
9509         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9510     if (hasRuntimeEvaluationCaptureSize) {
9511       QualType SizeArrayType = Ctx.getConstantArrayType(
9512           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9513           /*IndexTypeQuals=*/0);
9514       Info.SizesArray =
9515           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9516     } else {
9517       // We expect all the sizes to be constant, so we collect them to create
9518       // a constant array.
9519       SmallVector<llvm::Constant *, 16> ConstSizes;
9520       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9521         if (IsNonContiguous &&
9522             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9523           ConstSizes.push_back(llvm::ConstantInt::get(
9524               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9525         } else {
9526           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9527         }
9528       }
9529 
9530       auto *SizesArrayInit = llvm::ConstantArray::get(
9531           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9532       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9533       auto *SizesArrayGbl = new llvm::GlobalVariable(
9534           CGM.getModule(), SizesArrayInit->getType(),
9535           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9536           SizesArrayInit, Name);
9537       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9538       Info.SizesArray = SizesArrayGbl;
9539     }
9540 
9541     // The map types are always constant so we don't need to generate code to
9542     // fill arrays. Instead, we create an array constant.
9543     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9544     llvm::copy(CombinedInfo.Types, Mapping.begin());
9545     std::string MaptypesName =
9546         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9547     auto *MapTypesArrayGbl =
9548         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9549     Info.MapTypesArray = MapTypesArrayGbl;
9550 
9551     // The information types are only built if there is debug information
9552     // requested.
9553     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9554       Info.MapNamesArray = llvm::Constant::getNullValue(
9555           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9556     } else {
9557       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9558         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9559       };
9560       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9561       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9562       std::string MapnamesName =
9563           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9564       auto *MapNamesArrayGbl =
9565           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9566       Info.MapNamesArray = MapNamesArrayGbl;
9567     }
9568 
9569     // If there's a present map type modifier, it must not be applied to the end
9570     // of a region, so generate a separate map type array in that case.
9571     if (Info.separateBeginEndCalls()) {
9572       bool EndMapTypesDiffer = false;
9573       for (uint64_t &Type : Mapping) {
9574         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9575           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9576           EndMapTypesDiffer = true;
9577         }
9578       }
9579       if (EndMapTypesDiffer) {
9580         MapTypesArrayGbl =
9581             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9582         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9583       }
9584     }
9585 
9586     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9587       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9588       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9589           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9590           Info.BasePointersArray, 0, I);
9591       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9592           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9593       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9594       CGF.Builder.CreateStore(BPVal, BPAddr);
9595 
9596       if (Info.requiresDevicePointerInfo())
9597         if (const ValueDecl *DevVD =
9598                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9599           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9600 
9601       llvm::Value *PVal = CombinedInfo.Pointers[I];
9602       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9603           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9604           Info.PointersArray, 0, I);
9605       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9606           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9607       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9608       CGF.Builder.CreateStore(PVal, PAddr);
9609 
9610       if (hasRuntimeEvaluationCaptureSize) {
9611         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9612             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9613             Info.SizesArray,
9614             /*Idx0=*/0,
9615             /*Idx1=*/I);
9616         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9617         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9618                                                           CGM.Int64Ty,
9619                                                           /*isSigned=*/true),
9620                                 SAddr);
9621       }
9622 
9623       // Fill up the mapper array.
9624       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9625       if (CombinedInfo.Mappers[I]) {
9626         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9627             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9628         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9629         Info.HasMapper = true;
9630       }
9631       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9632       CGF.Builder.CreateStore(MFunc, MAddr);
9633     }
9634   }
9635 
9636   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9637       Info.NumberOfPtrs == 0)
9638     return;
9639 
9640   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9641 }
9642 
9643 namespace {
9644 /// Additional arguments for emitOffloadingArraysArgument function.
9645 struct ArgumentsOptions {
9646   bool ForEndCall = false;
9647   ArgumentsOptions() = default;
ArgumentsOptions__anone1a752754d11::ArgumentsOptions9648   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9649 };
9650 } // namespace
9651 
9652 /// Emit the arguments to be passed to the runtime library based on the
9653 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9654 /// ForEndCall, emit map types to be passed for the end of the region instead of
9655 /// the beginning.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,llvm::Value * & MapNamesArrayArg,llvm::Value * & MappersArrayArg,CGOpenMPRuntime::TargetDataInfo & Info,const ArgumentsOptions & Options=ArgumentsOptions ())9656 static void emitOffloadingArraysArgument(
9657     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9658     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9659     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9660     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9661     const ArgumentsOptions &Options = ArgumentsOptions()) {
9662   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9663          "expected region end call to runtime only when end call is separate");
9664   CodeGenModule &CGM = CGF.CGM;
9665   if (Info.NumberOfPtrs) {
9666     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9667         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9668         Info.BasePointersArray,
9669         /*Idx0=*/0, /*Idx1=*/0);
9670     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9671         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9672         Info.PointersArray,
9673         /*Idx0=*/0,
9674         /*Idx1=*/0);
9675     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9676         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9677         /*Idx0=*/0, /*Idx1=*/0);
9678     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9679         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9680         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9681                                                     : Info.MapTypesArray,
9682         /*Idx0=*/0,
9683         /*Idx1=*/0);
9684 
9685     // Only emit the mapper information arrays if debug information is
9686     // requested.
9687     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9688       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9689     else
9690       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9691           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9692           Info.MapNamesArray,
9693           /*Idx0=*/0,
9694           /*Idx1=*/0);
9695     // If there is no user-defined mapper, set the mapper array to nullptr to
9696     // avoid an unnecessary data privatization
9697     if (!Info.HasMapper)
9698       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9699     else
9700       MappersArrayArg =
9701           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9702   } else {
9703     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9704     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9705     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9706     MapTypesArrayArg =
9707         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9708     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9709     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9710   }
9711 }
9712 
9713 /// Check for inner distribute directive.
9714 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)9715 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9716   const auto *CS = D.getInnermostCapturedStmt();
9717   const auto *Body =
9718       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9719   const Stmt *ChildStmt =
9720       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9721 
9722   if (const auto *NestedDir =
9723           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9724     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9725     switch (D.getDirectiveKind()) {
9726     case OMPD_target:
9727       if (isOpenMPDistributeDirective(DKind))
9728         return NestedDir;
9729       if (DKind == OMPD_teams) {
9730         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9731             /*IgnoreCaptured=*/true);
9732         if (!Body)
9733           return nullptr;
9734         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9735         if (const auto *NND =
9736                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9737           DKind = NND->getDirectiveKind();
9738           if (isOpenMPDistributeDirective(DKind))
9739             return NND;
9740         }
9741       }
9742       return nullptr;
9743     case OMPD_target_teams:
9744       if (isOpenMPDistributeDirective(DKind))
9745         return NestedDir;
9746       return nullptr;
9747     case OMPD_target_parallel:
9748     case OMPD_target_simd:
9749     case OMPD_target_parallel_for:
9750     case OMPD_target_parallel_for_simd:
9751       return nullptr;
9752     case OMPD_target_teams_distribute:
9753     case OMPD_target_teams_distribute_simd:
9754     case OMPD_target_teams_distribute_parallel_for:
9755     case OMPD_target_teams_distribute_parallel_for_simd:
9756     case OMPD_parallel:
9757     case OMPD_for:
9758     case OMPD_parallel_for:
9759     case OMPD_parallel_master:
9760     case OMPD_parallel_sections:
9761     case OMPD_for_simd:
9762     case OMPD_parallel_for_simd:
9763     case OMPD_cancel:
9764     case OMPD_cancellation_point:
9765     case OMPD_ordered:
9766     case OMPD_threadprivate:
9767     case OMPD_allocate:
9768     case OMPD_task:
9769     case OMPD_simd:
9770     case OMPD_tile:
9771     case OMPD_unroll:
9772     case OMPD_sections:
9773     case OMPD_section:
9774     case OMPD_single:
9775     case OMPD_master:
9776     case OMPD_critical:
9777     case OMPD_taskyield:
9778     case OMPD_barrier:
9779     case OMPD_taskwait:
9780     case OMPD_taskgroup:
9781     case OMPD_atomic:
9782     case OMPD_flush:
9783     case OMPD_depobj:
9784     case OMPD_scan:
9785     case OMPD_teams:
9786     case OMPD_target_data:
9787     case OMPD_target_exit_data:
9788     case OMPD_target_enter_data:
9789     case OMPD_distribute:
9790     case OMPD_distribute_simd:
9791     case OMPD_distribute_parallel_for:
9792     case OMPD_distribute_parallel_for_simd:
9793     case OMPD_teams_distribute:
9794     case OMPD_teams_distribute_simd:
9795     case OMPD_teams_distribute_parallel_for:
9796     case OMPD_teams_distribute_parallel_for_simd:
9797     case OMPD_target_update:
9798     case OMPD_declare_simd:
9799     case OMPD_declare_variant:
9800     case OMPD_begin_declare_variant:
9801     case OMPD_end_declare_variant:
9802     case OMPD_declare_target:
9803     case OMPD_end_declare_target:
9804     case OMPD_declare_reduction:
9805     case OMPD_declare_mapper:
9806     case OMPD_taskloop:
9807     case OMPD_taskloop_simd:
9808     case OMPD_master_taskloop:
9809     case OMPD_master_taskloop_simd:
9810     case OMPD_parallel_master_taskloop:
9811     case OMPD_parallel_master_taskloop_simd:
9812     case OMPD_requires:
9813     case OMPD_unknown:
9814     default:
9815       llvm_unreachable("Unexpected directive.");
9816     }
9817   }
9818 
9819   return nullptr;
9820 }
9821 
9822 /// Emit the user-defined mapper function. The code generation follows the
9823 /// pattern in the example below.
9824 /// \code
9825 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9826 ///                                           void *base, void *begin,
9827 ///                                           int64_t size, int64_t type,
9828 ///                                           void *name = nullptr) {
9829 ///   // Allocate space for an array section first or add a base/begin for
9830 ///   // pointer dereference.
9831 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9832 ///       !maptype.IsDelete)
9833 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9834 ///                                 size*sizeof(Ty), clearToFromMember(type));
9835 ///   // Map members.
9836 ///   for (unsigned i = 0; i < size; i++) {
9837 ///     // For each component specified by this mapper:
9838 ///     for (auto c : begin[i]->all_components) {
9839 ///       if (c.hasMapper())
9840 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9841 ///                       c.arg_type, c.arg_name);
9842 ///       else
9843 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9844 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9845 ///                                     c.arg_name);
9846 ///     }
9847 ///   }
9848 ///   // Delete the array section.
9849 ///   if (size > 1 && maptype.IsDelete)
9850 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9851 ///                                 size*sizeof(Ty), clearToFromMember(type));
9852 /// }
9853 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9854 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9855                                             CodeGenFunction *CGF) {
9856   if (UDMMap.count(D) > 0)
9857     return;
9858   ASTContext &C = CGM.getContext();
9859   QualType Ty = D->getType();
9860   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9861   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9862   auto *MapperVarDecl =
9863       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9864   SourceLocation Loc = D->getLocation();
9865   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9866 
9867   // Prepare mapper function arguments and attributes.
9868   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9869                               C.VoidPtrTy, ImplicitParamDecl::Other);
9870   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9871                             ImplicitParamDecl::Other);
9872   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9873                              C.VoidPtrTy, ImplicitParamDecl::Other);
9874   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9875                             ImplicitParamDecl::Other);
9876   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9877                             ImplicitParamDecl::Other);
9878   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9879                             ImplicitParamDecl::Other);
9880   FunctionArgList Args;
9881   Args.push_back(&HandleArg);
9882   Args.push_back(&BaseArg);
9883   Args.push_back(&BeginArg);
9884   Args.push_back(&SizeArg);
9885   Args.push_back(&TypeArg);
9886   Args.push_back(&NameArg);
9887   const CGFunctionInfo &FnInfo =
9888       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9889   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9890   SmallString<64> TyStr;
9891   llvm::raw_svector_ostream Out(TyStr);
9892   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9893   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9894   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9895                                     Name, &CGM.getModule());
9896   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9897   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9898   // Start the mapper function code generation.
9899   CodeGenFunction MapperCGF(CGM);
9900   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9901   // Compute the starting and end addresses of array elements.
9902   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9903       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9904       C.getPointerType(Int64Ty), Loc);
9905   // Prepare common arguments for array initiation and deletion.
9906   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9907       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9908       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9909   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9910       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9911       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9912   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9913       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9914       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9915   // Convert the size in bytes into the number of array elements.
9916   Size = MapperCGF.Builder.CreateExactUDiv(
9917       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9918   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9919       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9920   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9921       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9922   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9923       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9924       C.getPointerType(Int64Ty), Loc);
9925   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9926       MapperCGF.GetAddrOfLocalVar(&NameArg),
9927       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9928 
9929   // Emit array initiation if this is an array section and \p MapType indicates
9930   // that memory allocation is required.
9931   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9932   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9933                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9934 
9935   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9936 
9937   // Emit the loop header block.
9938   MapperCGF.EmitBlock(HeadBB);
9939   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9940   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9941   // Evaluate whether the initial condition is satisfied.
9942   llvm::Value *IsEmpty =
9943       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9944   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9945   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9946 
9947   // Emit the loop body block.
9948   MapperCGF.EmitBlock(BodyBB);
9949   llvm::BasicBlock *LastBB = BodyBB;
9950   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9951       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9952   PtrPHI->addIncoming(PtrBegin, EntryBB);
9953   Address PtrCurrent =
9954       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9955                           .getAlignment()
9956                           .alignmentOfArrayElement(ElementSize));
9957   // Privatize the declared variable of mapper to be the current array element.
9958   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9959   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9960   (void)Scope.Privatize();
9961 
9962   // Get map clause information. Fill up the arrays with all mapped variables.
9963   MappableExprsHandler::MapCombinedInfoTy Info;
9964   MappableExprsHandler MEHandler(*D, MapperCGF);
9965   MEHandler.generateAllInfoForMapper(Info);
9966 
9967   // Call the runtime API __tgt_mapper_num_components to get the number of
9968   // pre-existing components.
9969   llvm::Value *OffloadingArgs[] = {Handle};
9970   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9971       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9972                                             OMPRTL___tgt_mapper_num_components),
9973       OffloadingArgs);
9974   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9975       PreviousSize,
9976       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9977 
9978   // Fill up the runtime mapper handle for all components.
9979   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9980     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9981         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9982     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9983         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9984     llvm::Value *CurSizeArg = Info.Sizes[I];
9985     llvm::Value *CurNameArg =
9986         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9987             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9988             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9989 
9990     // Extract the MEMBER_OF field from the map type.
9991     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9992     llvm::Value *MemberMapType =
9993         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9994 
9995     // Combine the map type inherited from user-defined mapper with that
9996     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9997     // bits of the \a MapType, which is the input argument of the mapper
9998     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9999     // bits of MemberMapType.
10000     // [OpenMP 5.0], 1.2.6. map-type decay.
10001     //        | alloc |  to   | from  | tofrom | release | delete
10002     // ----------------------------------------------------------
10003     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10004     // to     | alloc |  to   | alloc |   to   | release | delete
10005     // from   | alloc | alloc | from  |  from  | release | delete
10006     // tofrom | alloc |  to   | from  | tofrom | release | delete
10007     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10008         MapType,
10009         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10010                                    MappableExprsHandler::OMP_MAP_FROM));
10011     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10012     llvm::BasicBlock *AllocElseBB =
10013         MapperCGF.createBasicBlock("omp.type.alloc.else");
10014     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10015     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10016     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10017     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10018     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10019     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10020     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10021     MapperCGF.EmitBlock(AllocBB);
10022     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10023         MemberMapType,
10024         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10025                                      MappableExprsHandler::OMP_MAP_FROM)));
10026     MapperCGF.Builder.CreateBr(EndBB);
10027     MapperCGF.EmitBlock(AllocElseBB);
10028     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10029         LeftToFrom,
10030         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10031     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10032     // In case of to, clear OMP_MAP_FROM.
10033     MapperCGF.EmitBlock(ToBB);
10034     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10035         MemberMapType,
10036         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10037     MapperCGF.Builder.CreateBr(EndBB);
10038     MapperCGF.EmitBlock(ToElseBB);
10039     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10040         LeftToFrom,
10041         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10042     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10043     // In case of from, clear OMP_MAP_TO.
10044     MapperCGF.EmitBlock(FromBB);
10045     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10046         MemberMapType,
10047         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10048     // In case of tofrom, do nothing.
10049     MapperCGF.EmitBlock(EndBB);
10050     LastBB = EndBB;
10051     llvm::PHINode *CurMapType =
10052         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10053     CurMapType->addIncoming(AllocMapType, AllocBB);
10054     CurMapType->addIncoming(ToMapType, ToBB);
10055     CurMapType->addIncoming(FromMapType, FromBB);
10056     CurMapType->addIncoming(MemberMapType, ToElseBB);
10057 
10058     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10059                                      CurSizeArg, CurMapType, CurNameArg};
10060     if (Info.Mappers[I]) {
10061       // Call the corresponding mapper function.
10062       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10063           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10064       assert(MapperFunc && "Expect a valid mapper function is available.");
10065       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10066     } else {
10067       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10068       // data structure.
10069       MapperCGF.EmitRuntimeCall(
10070           OMPBuilder.getOrCreateRuntimeFunction(
10071               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10072           OffloadingArgs);
10073     }
10074   }
10075 
10076   // Update the pointer to point to the next element that needs to be mapped,
10077   // and check whether we have mapped all elements.
10078   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10079   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10080       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10081   PtrPHI->addIncoming(PtrNext, LastBB);
10082   llvm::Value *IsDone =
10083       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10084   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10085   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10086 
10087   MapperCGF.EmitBlock(ExitBB);
10088   // Emit array deletion if this is an array section and \p MapType indicates
10089   // that deletion is required.
10090   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10091                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10092 
10093   // Emit the function exit block.
10094   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10095   MapperCGF.FinishFunction();
10096   UDMMap.try_emplace(D, Fn);
10097   if (CGF) {
10098     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10099     Decls.second.push_back(D);
10100   }
10101 }
10102 
10103 /// Emit the array initialization or deletion portion for user-defined mapper
10104 /// code generation. First, it evaluates whether an array section is mapped and
10105 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10106 /// true, and \a MapType indicates to not delete this array, array
10107 /// initialization code is generated. If \a IsInit is false, and \a MapType
10108 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,llvm::Value * MapName,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)10109 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10110     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10111     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10112     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10113     bool IsInit) {
10114   StringRef Prefix = IsInit ? ".init" : ".del";
10115 
10116   // Evaluate if this is an array section.
10117   llvm::BasicBlock *BodyBB =
10118       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10119   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10120       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10121   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10122       MapType,
10123       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10124   llvm::Value *DeleteCond;
10125   llvm::Value *Cond;
10126   if (IsInit) {
10127     // base != begin?
10128     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10129         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10130     // IsPtrAndObj?
10131     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10132         MapType,
10133         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10134     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10135     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10136     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10137     DeleteCond = MapperCGF.Builder.CreateIsNull(
10138         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10139   } else {
10140     Cond = IsArray;
10141     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10142         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10143   }
10144   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10145   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10146 
10147   MapperCGF.EmitBlock(BodyBB);
10148   // Get the array size by multiplying element size and element number (i.e., \p
10149   // Size).
10150   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10151       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10152   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10153   // memory allocation/deletion purpose only.
10154   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10155       MapType,
10156       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10157                                    MappableExprsHandler::OMP_MAP_FROM)));
10158   MapTypeArg = MapperCGF.Builder.CreateOr(
10159       MapTypeArg,
10160       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10161 
10162   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10163   // data structure.
10164   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10165                                    ArraySize, MapTypeArg, MapName};
10166   MapperCGF.EmitRuntimeCall(
10167       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10168                                             OMPRTL___tgt_push_mapper_component),
10169       OffloadingArgs);
10170 }
10171 
getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)10172 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10173     const OMPDeclareMapperDecl *D) {
10174   auto I = UDMMap.find(D);
10175   if (I != UDMMap.end())
10176     return I->second;
10177   emitUserDefinedMapper(D);
10178   return UDMMap.lookup(D);
10179 }
10180 
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * DeviceID,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)10181 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10182     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10183     llvm::Value *DeviceID,
10184     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10185                                      const OMPLoopDirective &D)>
10186         SizeEmitter) {
10187   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10188   const OMPExecutableDirective *TD = &D;
10189   // Get nested teams distribute kind directive, if any.
10190   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10191     TD = getNestedDistributeDirective(CGM.getContext(), D);
10192   if (!TD)
10193     return;
10194   const auto *LD = cast<OMPLoopDirective>(TD);
10195   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10196                                                          PrePostActionTy &) {
10197     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10198       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10199       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10200       CGF.EmitRuntimeCall(
10201           OMPBuilder.getOrCreateRuntimeFunction(
10202               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10203           Args);
10204     }
10205   };
10206   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10207 }
10208 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)10209 void CGOpenMPRuntime::emitTargetCall(
10210     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10211     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10212     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10213     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10214                                      const OMPLoopDirective &D)>
10215         SizeEmitter) {
10216   if (!CGF.HaveInsertPoint())
10217     return;
10218 
10219   assert(OutlinedFn && "Invalid outlined function!");
10220 
10221   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10222                                  D.hasClausesOfKind<OMPNowaitClause>();
10223   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10224   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10225   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10226                                             PrePostActionTy &) {
10227     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10228   };
10229   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10230 
10231   CodeGenFunction::OMPTargetDataInfo InputInfo;
10232   llvm::Value *MapTypesArray = nullptr;
10233   llvm::Value *MapNamesArray = nullptr;
10234   // Fill up the pointer arrays and transfer execution to the device.
10235   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10236                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10237                     &CapturedVars,
10238                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10239     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10240       // Reverse offloading is not supported, so just execute on the host.
10241       if (RequiresOuterTask) {
10242         CapturedVars.clear();
10243         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10244       }
10245       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10246       return;
10247     }
10248 
10249     // On top of the arrays that were filled up, the target offloading call
10250     // takes as arguments the device id as well as the host pointer. The host
10251     // pointer is used by the runtime library to identify the current target
10252     // region, so it only has to be unique and not necessarily point to
10253     // anything. It could be the pointer to the outlined function that
10254     // implements the target region, but we aren't using that so that the
10255     // compiler doesn't need to keep that, and could therefore inline the host
10256     // function if proven worthwhile during optimization.
10257 
10258     // From this point on, we need to have an ID of the target region defined.
10259     assert(OutlinedFnID && "Invalid outlined function ID!");
10260 
10261     // Emit device ID if any.
10262     llvm::Value *DeviceID;
10263     if (Device.getPointer()) {
10264       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10265               Device.getInt() == OMPC_DEVICE_device_num) &&
10266              "Expected device_num modifier.");
10267       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10268       DeviceID =
10269           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10270     } else {
10271       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10272     }
10273 
10274     // Emit the number of elements in the offloading arrays.
10275     llvm::Value *PointerNum =
10276         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10277 
10278     // Return value of the runtime offloading call.
10279     llvm::Value *Return;
10280 
10281     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10282     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10283 
10284     // Source location for the ident struct
10285     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10286 
10287     // Emit tripcount for the target loop-based directive.
10288     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10289 
10290     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10291     // The target region is an outlined function launched by the runtime
10292     // via calls __tgt_target() or __tgt_target_teams().
10293     //
10294     // __tgt_target() launches a target region with one team and one thread,
10295     // executing a serial region.  This master thread may in turn launch
10296     // more threads within its team upon encountering a parallel region,
10297     // however, no additional teams can be launched on the device.
10298     //
10299     // __tgt_target_teams() launches a target region with one or more teams,
10300     // each with one or more threads.  This call is required for target
10301     // constructs such as:
10302     //  'target teams'
10303     //  'target' / 'teams'
10304     //  'target teams distribute parallel for'
10305     //  'target parallel'
10306     // and so on.
10307     //
10308     // Note that on the host and CPU targets, the runtime implementation of
10309     // these calls simply call the outlined function without forking threads.
10310     // The outlined functions themselves have runtime calls to
10311     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10312     // the compiler in emitTeamsCall() and emitParallelCall().
10313     //
10314     // In contrast, on the NVPTX target, the implementation of
10315     // __tgt_target_teams() launches a GPU kernel with the requested number
10316     // of teams and threads so no additional calls to the runtime are required.
10317     if (NumTeams) {
10318       // If we have NumTeams defined this means that we have an enclosed teams
10319       // region. Therefore we also expect to have NumThreads defined. These two
10320       // values should be defined in the presence of a teams directive,
10321       // regardless of having any clauses associated. If the user is using teams
10322       // but no clauses, these two values will be the default that should be
10323       // passed to the runtime library - a 32-bit integer with the value zero.
10324       assert(NumThreads && "Thread limit expression should be available along "
10325                            "with number of teams.");
10326       SmallVector<llvm::Value *> OffloadingArgs = {
10327           RTLoc,
10328           DeviceID,
10329           OutlinedFnID,
10330           PointerNum,
10331           InputInfo.BasePointersArray.getPointer(),
10332           InputInfo.PointersArray.getPointer(),
10333           InputInfo.SizesArray.getPointer(),
10334           MapTypesArray,
10335           MapNamesArray,
10336           InputInfo.MappersArray.getPointer(),
10337           NumTeams,
10338           NumThreads};
10339       if (HasNowait) {
10340         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10341         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10342         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10343         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10344         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10345         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10346       }
10347       Return = CGF.EmitRuntimeCall(
10348           OMPBuilder.getOrCreateRuntimeFunction(
10349               CGM.getModule(), HasNowait
10350                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10351                                    : OMPRTL___tgt_target_teams_mapper),
10352           OffloadingArgs);
10353     } else {
10354       SmallVector<llvm::Value *> OffloadingArgs = {
10355           RTLoc,
10356           DeviceID,
10357           OutlinedFnID,
10358           PointerNum,
10359           InputInfo.BasePointersArray.getPointer(),
10360           InputInfo.PointersArray.getPointer(),
10361           InputInfo.SizesArray.getPointer(),
10362           MapTypesArray,
10363           MapNamesArray,
10364           InputInfo.MappersArray.getPointer()};
10365       if (HasNowait) {
10366         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10367         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10368         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10369         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10370         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10371         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10372       }
10373       Return = CGF.EmitRuntimeCall(
10374           OMPBuilder.getOrCreateRuntimeFunction(
10375               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10376                                          : OMPRTL___tgt_target_mapper),
10377           OffloadingArgs);
10378     }
10379 
10380     // Check the error code and execute the host version if required.
10381     llvm::BasicBlock *OffloadFailedBlock =
10382         CGF.createBasicBlock("omp_offload.failed");
10383     llvm::BasicBlock *OffloadContBlock =
10384         CGF.createBasicBlock("omp_offload.cont");
10385     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10386     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10387 
10388     CGF.EmitBlock(OffloadFailedBlock);
10389     if (RequiresOuterTask) {
10390       CapturedVars.clear();
10391       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10392     }
10393     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10394     CGF.EmitBranch(OffloadContBlock);
10395 
10396     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10397   };
10398 
10399   // Notify that the host version must be executed.
10400   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10401                     RequiresOuterTask](CodeGenFunction &CGF,
10402                                        PrePostActionTy &) {
10403     if (RequiresOuterTask) {
10404       CapturedVars.clear();
10405       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10406     }
10407     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10408   };
10409 
10410   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10411                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10412                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10413     // Fill up the arrays with all the captured variables.
10414     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10415 
10416     // Get mappable expression information.
10417     MappableExprsHandler MEHandler(D, CGF);
10418     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10419     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10420 
10421     auto RI = CS.getCapturedRecordDecl()->field_begin();
10422     auto *CV = CapturedVars.begin();
10423     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10424                                               CE = CS.capture_end();
10425          CI != CE; ++CI, ++RI, ++CV) {
10426       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10427       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10428 
10429       // VLA sizes are passed to the outlined region by copy and do not have map
10430       // information associated.
10431       if (CI->capturesVariableArrayType()) {
10432         CurInfo.Exprs.push_back(nullptr);
10433         CurInfo.BasePointers.push_back(*CV);
10434         CurInfo.Pointers.push_back(*CV);
10435         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10436             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10437         // Copy to the device as an argument. No need to retrieve it.
10438         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10439                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10440                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10441         CurInfo.Mappers.push_back(nullptr);
10442       } else {
10443         // If we have any information in the map clause, we use it, otherwise we
10444         // just do a default mapping.
10445         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10446         if (!CI->capturesThis())
10447           MappedVarSet.insert(CI->getCapturedVar());
10448         else
10449           MappedVarSet.insert(nullptr);
10450         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10451           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10452         // Generate correct mapping for variables captured by reference in
10453         // lambdas.
10454         if (CI->capturesVariable())
10455           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10456                                                   CurInfo, LambdaPointers);
10457       }
10458       // We expect to have at least an element of information for this capture.
10459       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10460              "Non-existing map pointer for capture!");
10461       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10462              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10463              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10464              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10465              "Inconsistent map information sizes!");
10466 
10467       // If there is an entry in PartialStruct it means we have a struct with
10468       // individual members mapped. Emit an extra combined entry.
10469       if (PartialStruct.Base.isValid()) {
10470         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10471         MEHandler.emitCombinedEntry(
10472             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10473             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10474       }
10475 
10476       // We need to append the results of this capture to what we already have.
10477       CombinedInfo.append(CurInfo);
10478     }
10479     // Adjust MEMBER_OF flags for the lambdas captures.
10480     MEHandler.adjustMemberOfForLambdaCaptures(
10481         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10482         CombinedInfo.Types);
10483     // Map any list items in a map clause that were not captures because they
10484     // weren't referenced within the construct.
10485     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10486 
10487     TargetDataInfo Info;
10488     // Fill up the arrays and create the arguments.
10489     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10490     emitOffloadingArraysArgument(
10491         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10492         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10493         {/*ForEndTask=*/false});
10494 
10495     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10496     InputInfo.BasePointersArray =
10497         Address(Info.BasePointersArray, CGM.getPointerAlign());
10498     InputInfo.PointersArray =
10499         Address(Info.PointersArray, CGM.getPointerAlign());
10500     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10501     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10502     MapTypesArray = Info.MapTypesArray;
10503     MapNamesArray = Info.MapNamesArray;
10504     if (RequiresOuterTask)
10505       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10506     else
10507       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10508   };
10509 
10510   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10511                              CodeGenFunction &CGF, PrePostActionTy &) {
10512     if (RequiresOuterTask) {
10513       CodeGenFunction::OMPTargetDataInfo InputInfo;
10514       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10515     } else {
10516       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10517     }
10518   };
10519 
10520   // If we have a target function ID it means that we need to support
10521   // offloading, otherwise, just execute on the host. We need to execute on host
10522   // regardless of the conditional in the if clause if, e.g., the user do not
10523   // specify target triples.
10524   if (OutlinedFnID) {
10525     if (IfCond) {
10526       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10527     } else {
10528       RegionCodeGenTy ThenRCG(TargetThenGen);
10529       ThenRCG(CGF);
10530     }
10531   } else {
10532     RegionCodeGenTy ElseRCG(TargetElseGen);
10533     ElseRCG(CGF);
10534   }
10535 }
10536 
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)10537 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10538                                                     StringRef ParentName) {
10539   if (!S)
10540     return;
10541 
10542   // Codegen OMP target directives that offload compute to the device.
10543   bool RequiresDeviceCodegen =
10544       isa<OMPExecutableDirective>(S) &&
10545       isOpenMPTargetExecutionDirective(
10546           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10547 
10548   if (RequiresDeviceCodegen) {
10549     const auto &E = *cast<OMPExecutableDirective>(S);
10550     unsigned DeviceID;
10551     unsigned FileID;
10552     unsigned Line;
10553     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10554                              FileID, Line);
10555 
10556     // Is this a target region that should not be emitted as an entry point? If
10557     // so just signal we are done with this target region.
10558     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10559                                                             ParentName, Line))
10560       return;
10561 
10562     switch (E.getDirectiveKind()) {
10563     case OMPD_target:
10564       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10565                                                    cast<OMPTargetDirective>(E));
10566       break;
10567     case OMPD_target_parallel:
10568       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10569           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10570       break;
10571     case OMPD_target_teams:
10572       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10573           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10574       break;
10575     case OMPD_target_teams_distribute:
10576       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10577           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10578       break;
10579     case OMPD_target_teams_distribute_simd:
10580       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10581           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10582       break;
10583     case OMPD_target_parallel_for:
10584       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10585           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10586       break;
10587     case OMPD_target_parallel_for_simd:
10588       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10589           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10590       break;
10591     case OMPD_target_simd:
10592       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10593           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10594       break;
10595     case OMPD_target_teams_distribute_parallel_for:
10596       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10597           CGM, ParentName,
10598           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10599       break;
10600     case OMPD_target_teams_distribute_parallel_for_simd:
10601       CodeGenFunction::
10602           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10603               CGM, ParentName,
10604               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10605       break;
10606     case OMPD_parallel:
10607     case OMPD_for:
10608     case OMPD_parallel_for:
10609     case OMPD_parallel_master:
10610     case OMPD_parallel_sections:
10611     case OMPD_for_simd:
10612     case OMPD_parallel_for_simd:
10613     case OMPD_cancel:
10614     case OMPD_cancellation_point:
10615     case OMPD_ordered:
10616     case OMPD_threadprivate:
10617     case OMPD_allocate:
10618     case OMPD_task:
10619     case OMPD_simd:
10620     case OMPD_tile:
10621     case OMPD_unroll:
10622     case OMPD_sections:
10623     case OMPD_section:
10624     case OMPD_single:
10625     case OMPD_master:
10626     case OMPD_critical:
10627     case OMPD_taskyield:
10628     case OMPD_barrier:
10629     case OMPD_taskwait:
10630     case OMPD_taskgroup:
10631     case OMPD_atomic:
10632     case OMPD_flush:
10633     case OMPD_depobj:
10634     case OMPD_scan:
10635     case OMPD_teams:
10636     case OMPD_target_data:
10637     case OMPD_target_exit_data:
10638     case OMPD_target_enter_data:
10639     case OMPD_distribute:
10640     case OMPD_distribute_simd:
10641     case OMPD_distribute_parallel_for:
10642     case OMPD_distribute_parallel_for_simd:
10643     case OMPD_teams_distribute:
10644     case OMPD_teams_distribute_simd:
10645     case OMPD_teams_distribute_parallel_for:
10646     case OMPD_teams_distribute_parallel_for_simd:
10647     case OMPD_target_update:
10648     case OMPD_declare_simd:
10649     case OMPD_declare_variant:
10650     case OMPD_begin_declare_variant:
10651     case OMPD_end_declare_variant:
10652     case OMPD_declare_target:
10653     case OMPD_end_declare_target:
10654     case OMPD_declare_reduction:
10655     case OMPD_declare_mapper:
10656     case OMPD_taskloop:
10657     case OMPD_taskloop_simd:
10658     case OMPD_master_taskloop:
10659     case OMPD_master_taskloop_simd:
10660     case OMPD_parallel_master_taskloop:
10661     case OMPD_parallel_master_taskloop_simd:
10662     case OMPD_requires:
10663     case OMPD_unknown:
10664     default:
10665       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10666     }
10667     return;
10668   }
10669 
10670   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10671     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10672       return;
10673 
10674     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10675     return;
10676   }
10677 
10678   // If this is a lambda function, look into its body.
10679   if (const auto *L = dyn_cast<LambdaExpr>(S))
10680     S = L->getBody();
10681 
10682   // Keep looking for target regions recursively.
10683   for (const Stmt *II : S->children())
10684     scanForTargetRegionsFunctions(II, ParentName);
10685 }
10686 
isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)10687 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10688   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10689       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10690   if (!DevTy)
10691     return false;
10692   // Do not emit device_type(nohost) functions for the host.
10693   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10694     return true;
10695   // Do not emit device_type(host) functions for the device.
10696   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10697     return true;
10698   return false;
10699 }
10700 
emitTargetFunctions(GlobalDecl GD)10701 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10702   // If emitting code for the host, we do not process FD here. Instead we do
10703   // the normal code generation.
10704   if (!CGM.getLangOpts().OpenMPIsDevice) {
10705     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10706       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10707                                   CGM.getLangOpts().OpenMPIsDevice))
10708         return true;
10709     return false;
10710   }
10711 
10712   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10713   // Try to detect target regions in the function.
10714   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10715     StringRef Name = CGM.getMangledName(GD);
10716     scanForTargetRegionsFunctions(FD->getBody(), Name);
10717     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10718                                 CGM.getLangOpts().OpenMPIsDevice))
10719       return true;
10720   }
10721 
10722   // Do not to emit function if it is not marked as declare target.
10723   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10724          AlreadyEmittedTargetDecls.count(VD) == 0;
10725 }
10726 
emitTargetGlobalVariable(GlobalDecl GD)10727 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10728   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10729                               CGM.getLangOpts().OpenMPIsDevice))
10730     return true;
10731 
10732   if (!CGM.getLangOpts().OpenMPIsDevice)
10733     return false;
10734 
10735   // Check if there are Ctors/Dtors in this declaration and look for target
10736   // regions in it. We use the complete variant to produce the kernel name
10737   // mangling.
10738   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10739   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10740     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10741       StringRef ParentName =
10742           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10743       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10744     }
10745     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10746       StringRef ParentName =
10747           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10748       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10749     }
10750   }
10751 
10752   // Do not to emit variable if it is not marked as declare target.
10753   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10754       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10755           cast<VarDecl>(GD.getDecl()));
10756   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10757       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10758        HasRequiresUnifiedSharedMemory)) {
10759     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10760     return true;
10761   }
10762   return false;
10763 }
10764 
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)10765 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10766                                                    llvm::Constant *Addr) {
10767   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10768       !CGM.getLangOpts().OpenMPIsDevice)
10769     return;
10770 
10771   // If we have host/nohost variables, they do not need to be registered.
10772   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10773       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10774   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10775     return;
10776 
10777   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10778       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10779   if (!Res) {
10780     if (CGM.getLangOpts().OpenMPIsDevice) {
10781       // Register non-target variables being emitted in device code (debug info
10782       // may cause this).
10783       StringRef VarName = CGM.getMangledName(VD);
10784       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10785     }
10786     return;
10787   }
10788   // Register declare target variables.
10789   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10790   StringRef VarName;
10791   CharUnits VarSize;
10792   llvm::GlobalValue::LinkageTypes Linkage;
10793 
10794   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10795       !HasRequiresUnifiedSharedMemory) {
10796     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10797     VarName = CGM.getMangledName(VD);
10798     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10799       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10800       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10801     } else {
10802       VarSize = CharUnits::Zero();
10803     }
10804     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10805     // Temp solution to prevent optimizations of the internal variables.
10806     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10807       // Do not create a "ref-variable" if the original is not also available
10808       // on the host.
10809       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10810         return;
10811       std::string RefName = getName({VarName, "ref"});
10812       if (!CGM.GetGlobalValue(RefName)) {
10813         llvm::Constant *AddrRef =
10814             getOrCreateInternalVariable(Addr->getType(), RefName);
10815         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10816         GVAddrRef->setConstant(/*Val=*/true);
10817         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10818         GVAddrRef->setInitializer(Addr);
10819         CGM.addCompilerUsedGlobal(GVAddrRef);
10820       }
10821     }
10822   } else {
10823     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10824             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10825              HasRequiresUnifiedSharedMemory)) &&
10826            "Declare target attribute must link or to with unified memory.");
10827     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10828       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10829     else
10830       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10831 
10832     if (CGM.getLangOpts().OpenMPIsDevice) {
10833       VarName = Addr->getName();
10834       Addr = nullptr;
10835     } else {
10836       VarName = getAddrOfDeclareTargetVar(VD).getName();
10837       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10838     }
10839     VarSize = CGM.getPointerSize();
10840     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10841   }
10842 
10843   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10844       VarName, Addr, VarSize, Flags, Linkage);
10845 }
10846 
emitTargetGlobal(GlobalDecl GD)10847 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10848   if (isa<FunctionDecl>(GD.getDecl()) ||
10849       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10850     return emitTargetFunctions(GD);
10851 
10852   return emitTargetGlobalVariable(GD);
10853 }
10854 
emitDeferredTargetDecls() const10855 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10856   for (const VarDecl *VD : DeferredGlobalVariables) {
10857     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10858         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10859     if (!Res)
10860       continue;
10861     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10862         !HasRequiresUnifiedSharedMemory) {
10863       CGM.EmitGlobal(VD);
10864     } else {
10865       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10866               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10867                HasRequiresUnifiedSharedMemory)) &&
10868              "Expected link clause or to clause with unified memory.");
10869       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10870     }
10871   }
10872 }
10873 
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10874 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10875     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10876   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10877          " Expected target-based directive.");
10878 }
10879 
processRequiresDirective(const OMPRequiresDecl * D)10880 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10881   for (const OMPClause *Clause : D->clauselists()) {
10882     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10883       HasRequiresUnifiedSharedMemory = true;
10884     } else if (const auto *AC =
10885                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10886       switch (AC->getAtomicDefaultMemOrderKind()) {
10887       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10888         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10889         break;
10890       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10891         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10892         break;
10893       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10894         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10895         break;
10896       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10897         break;
10898       }
10899     }
10900   }
10901 }
10902 
getDefaultMemoryOrdering() const10903 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10904   return RequiresAtomicOrdering;
10905 }
10906 
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10907 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10908                                                        LangAS &AS) {
10909   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10910     return false;
10911   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10912   switch(A->getAllocatorType()) {
10913   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10914   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10915   // Not supported, fallback to the default mem space.
10916   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10917   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10918   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10919   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10920   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10921   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10922   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10923     AS = LangAS::Default;
10924     return true;
10925   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10926     llvm_unreachable("Expected predefined allocator for the variables with the "
10927                      "static storage.");
10928   }
10929   return false;
10930 }
10931 
hasRequiresUnifiedSharedMemory() const10932 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10933   return HasRequiresUnifiedSharedMemory;
10934 }
10935 
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10936 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10937     CodeGenModule &CGM)
10938     : CGM(CGM) {
10939   if (CGM.getLangOpts().OpenMPIsDevice) {
10940     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10941     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10942   }
10943 }
10944 
~DisableAutoDeclareTargetRAII()10945 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10946   if (CGM.getLangOpts().OpenMPIsDevice)
10947     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10948 }
10949 
markAsGlobalTarget(GlobalDecl GD)10950 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10951   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10952     return true;
10953 
10954   const auto *D = cast<FunctionDecl>(GD.getDecl());
10955   // Do not to emit function if it is marked as declare target as it was already
10956   // emitted.
10957   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10958     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10959       if (auto *F = dyn_cast_or_null<llvm::Function>(
10960               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10961         return !F->isDeclaration();
10962       return false;
10963     }
10964     return true;
10965   }
10966 
10967   return !AlreadyEmittedTargetDecls.insert(D).second;
10968 }
10969 
emitRequiresDirectiveRegFun()10970 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10971   // If we don't have entries or if we are emitting code for the device, we
10972   // don't need to do anything.
10973   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10974       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10975       (OffloadEntriesInfoManager.empty() &&
10976        !HasEmittedDeclareTargetRegion &&
10977        !HasEmittedTargetRegion))
10978     return nullptr;
10979 
10980   // Create and register the function that handles the requires directives.
10981   ASTContext &C = CGM.getContext();
10982 
10983   llvm::Function *RequiresRegFn;
10984   {
10985     CodeGenFunction CGF(CGM);
10986     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10987     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10988     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10989     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10990     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10991     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10992     // TODO: check for other requires clauses.
10993     // The requires directive takes effect only when a target region is
10994     // present in the compilation unit. Otherwise it is ignored and not
10995     // passed to the runtime. This avoids the runtime from throwing an error
10996     // for mismatching requires clauses across compilation units that don't
10997     // contain at least 1 target region.
10998     assert((HasEmittedTargetRegion ||
10999             HasEmittedDeclareTargetRegion ||
11000             !OffloadEntriesInfoManager.empty()) &&
11001            "Target or declare target region expected.");
11002     if (HasRequiresUnifiedSharedMemory)
11003       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11004     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11005                             CGM.getModule(), OMPRTL___tgt_register_requires),
11006                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11007     CGF.FinishFunction();
11008   }
11009   return RequiresRegFn;
11010 }
11011 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)11012 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11013                                     const OMPExecutableDirective &D,
11014                                     SourceLocation Loc,
11015                                     llvm::Function *OutlinedFn,
11016                                     ArrayRef<llvm::Value *> CapturedVars) {
11017   if (!CGF.HaveInsertPoint())
11018     return;
11019 
11020   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11021   CodeGenFunction::RunCleanupsScope Scope(CGF);
11022 
11023   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11024   llvm::Value *Args[] = {
11025       RTLoc,
11026       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11027       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11028   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11029   RealArgs.append(std::begin(Args), std::end(Args));
11030   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11031 
11032   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11033       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11034   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11035 }
11036 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)11037 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11038                                          const Expr *NumTeams,
11039                                          const Expr *ThreadLimit,
11040                                          SourceLocation Loc) {
11041   if (!CGF.HaveInsertPoint())
11042     return;
11043 
11044   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11045 
11046   llvm::Value *NumTeamsVal =
11047       NumTeams
11048           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11049                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11050           : CGF.Builder.getInt32(0);
11051 
11052   llvm::Value *ThreadLimitVal =
11053       ThreadLimit
11054           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11055                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11056           : CGF.Builder.getInt32(0);
11057 
11058   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11059   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11060                                      ThreadLimitVal};
11061   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11062                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11063                       PushNumTeamsArgs);
11064 }
11065 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11066 void CGOpenMPRuntime::emitTargetDataCalls(
11067     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11068     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11069   if (!CGF.HaveInsertPoint())
11070     return;
11071 
11072   // Action used to replace the default codegen action and turn privatization
11073   // off.
11074   PrePostActionTy NoPrivAction;
11075 
11076   // Generate the code for the opening of the data environment. Capture all the
11077   // arguments of the runtime call by reference because they are used in the
11078   // closing of the region.
11079   auto &&BeginThenGen = [this, &D, Device, &Info,
11080                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11081     // Fill up the arrays with all the mapped variables.
11082     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11083 
11084     // Get map clause information.
11085     MappableExprsHandler MEHandler(D, CGF);
11086     MEHandler.generateAllInfo(CombinedInfo);
11087 
11088     // Fill up the arrays and create the arguments.
11089     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11090                          /*IsNonContiguous=*/true);
11091 
11092     llvm::Value *BasePointersArrayArg = nullptr;
11093     llvm::Value *PointersArrayArg = nullptr;
11094     llvm::Value *SizesArrayArg = nullptr;
11095     llvm::Value *MapTypesArrayArg = nullptr;
11096     llvm::Value *MapNamesArrayArg = nullptr;
11097     llvm::Value *MappersArrayArg = nullptr;
11098     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11099                                  SizesArrayArg, MapTypesArrayArg,
11100                                  MapNamesArrayArg, MappersArrayArg, Info);
11101 
11102     // Emit device ID if any.
11103     llvm::Value *DeviceID = nullptr;
11104     if (Device) {
11105       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11106                                            CGF.Int64Ty, /*isSigned=*/true);
11107     } else {
11108       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11109     }
11110 
11111     // Emit the number of elements in the offloading arrays.
11112     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11113     //
11114     // Source location for the ident struct
11115     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11116 
11117     llvm::Value *OffloadingArgs[] = {RTLoc,
11118                                      DeviceID,
11119                                      PointerNum,
11120                                      BasePointersArrayArg,
11121                                      PointersArrayArg,
11122                                      SizesArrayArg,
11123                                      MapTypesArrayArg,
11124                                      MapNamesArrayArg,
11125                                      MappersArrayArg};
11126     CGF.EmitRuntimeCall(
11127         OMPBuilder.getOrCreateRuntimeFunction(
11128             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11129         OffloadingArgs);
11130 
11131     // If device pointer privatization is required, emit the body of the region
11132     // here. It will have to be duplicated: with and without privatization.
11133     if (!Info.CaptureDeviceAddrMap.empty())
11134       CodeGen(CGF);
11135   };
11136 
11137   // Generate code for the closing of the data region.
11138   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11139                                                 PrePostActionTy &) {
11140     assert(Info.isValid() && "Invalid data environment closing arguments.");
11141 
11142     llvm::Value *BasePointersArrayArg = nullptr;
11143     llvm::Value *PointersArrayArg = nullptr;
11144     llvm::Value *SizesArrayArg = nullptr;
11145     llvm::Value *MapTypesArrayArg = nullptr;
11146     llvm::Value *MapNamesArrayArg = nullptr;
11147     llvm::Value *MappersArrayArg = nullptr;
11148     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11149                                  SizesArrayArg, MapTypesArrayArg,
11150                                  MapNamesArrayArg, MappersArrayArg, Info,
11151                                  {/*ForEndCall=*/true});
11152 
11153     // Emit device ID if any.
11154     llvm::Value *DeviceID = nullptr;
11155     if (Device) {
11156       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11157                                            CGF.Int64Ty, /*isSigned=*/true);
11158     } else {
11159       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11160     }
11161 
11162     // Emit the number of elements in the offloading arrays.
11163     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11164 
11165     // Source location for the ident struct
11166     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11167 
11168     llvm::Value *OffloadingArgs[] = {RTLoc,
11169                                      DeviceID,
11170                                      PointerNum,
11171                                      BasePointersArrayArg,
11172                                      PointersArrayArg,
11173                                      SizesArrayArg,
11174                                      MapTypesArrayArg,
11175                                      MapNamesArrayArg,
11176                                      MappersArrayArg};
11177     CGF.EmitRuntimeCall(
11178         OMPBuilder.getOrCreateRuntimeFunction(
11179             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11180         OffloadingArgs);
11181   };
11182 
11183   // If we need device pointer privatization, we need to emit the body of the
11184   // region with no privatization in the 'else' branch of the conditional.
11185   // Otherwise, we don't have to do anything.
11186   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11187                                                          PrePostActionTy &) {
11188     if (!Info.CaptureDeviceAddrMap.empty()) {
11189       CodeGen.setAction(NoPrivAction);
11190       CodeGen(CGF);
11191     }
11192   };
11193 
11194   // We don't have to do anything to close the region if the if clause evaluates
11195   // to false.
11196   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11197 
11198   if (IfCond) {
11199     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11200   } else {
11201     RegionCodeGenTy RCG(BeginThenGen);
11202     RCG(CGF);
11203   }
11204 
11205   // If we don't require privatization of device pointers, we emit the body in
11206   // between the runtime calls. This avoids duplicating the body code.
11207   if (Info.CaptureDeviceAddrMap.empty()) {
11208     CodeGen.setAction(NoPrivAction);
11209     CodeGen(CGF);
11210   }
11211 
11212   if (IfCond) {
11213     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11214   } else {
11215     RegionCodeGenTy RCG(EndThenGen);
11216     RCG(CGF);
11217   }
11218 }
11219 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11220 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11221     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11222     const Expr *Device) {
11223   if (!CGF.HaveInsertPoint())
11224     return;
11225 
11226   assert((isa<OMPTargetEnterDataDirective>(D) ||
11227           isa<OMPTargetExitDataDirective>(D) ||
11228           isa<OMPTargetUpdateDirective>(D)) &&
11229          "Expecting either target enter, exit data, or update directives.");
11230 
11231   CodeGenFunction::OMPTargetDataInfo InputInfo;
11232   llvm::Value *MapTypesArray = nullptr;
11233   llvm::Value *MapNamesArray = nullptr;
11234   // Generate the code for the opening of the data environment.
11235   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11236                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11237     // Emit device ID if any.
11238     llvm::Value *DeviceID = nullptr;
11239     if (Device) {
11240       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11241                                            CGF.Int64Ty, /*isSigned=*/true);
11242     } else {
11243       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11244     }
11245 
11246     // Emit the number of elements in the offloading arrays.
11247     llvm::Constant *PointerNum =
11248         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11249 
11250     // Source location for the ident struct
11251     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11252 
11253     llvm::Value *OffloadingArgs[] = {RTLoc,
11254                                      DeviceID,
11255                                      PointerNum,
11256                                      InputInfo.BasePointersArray.getPointer(),
11257                                      InputInfo.PointersArray.getPointer(),
11258                                      InputInfo.SizesArray.getPointer(),
11259                                      MapTypesArray,
11260                                      MapNamesArray,
11261                                      InputInfo.MappersArray.getPointer()};
11262 
11263     // Select the right runtime function call for each standalone
11264     // directive.
11265     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11266     RuntimeFunction RTLFn;
11267     switch (D.getDirectiveKind()) {
11268     case OMPD_target_enter_data:
11269       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11270                         : OMPRTL___tgt_target_data_begin_mapper;
11271       break;
11272     case OMPD_target_exit_data:
11273       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11274                         : OMPRTL___tgt_target_data_end_mapper;
11275       break;
11276     case OMPD_target_update:
11277       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11278                         : OMPRTL___tgt_target_data_update_mapper;
11279       break;
11280     case OMPD_parallel:
11281     case OMPD_for:
11282     case OMPD_parallel_for:
11283     case OMPD_parallel_master:
11284     case OMPD_parallel_sections:
11285     case OMPD_for_simd:
11286     case OMPD_parallel_for_simd:
11287     case OMPD_cancel:
11288     case OMPD_cancellation_point:
11289     case OMPD_ordered:
11290     case OMPD_threadprivate:
11291     case OMPD_allocate:
11292     case OMPD_task:
11293     case OMPD_simd:
11294     case OMPD_tile:
11295     case OMPD_unroll:
11296     case OMPD_sections:
11297     case OMPD_section:
11298     case OMPD_single:
11299     case OMPD_master:
11300     case OMPD_critical:
11301     case OMPD_taskyield:
11302     case OMPD_barrier:
11303     case OMPD_taskwait:
11304     case OMPD_taskgroup:
11305     case OMPD_atomic:
11306     case OMPD_flush:
11307     case OMPD_depobj:
11308     case OMPD_scan:
11309     case OMPD_teams:
11310     case OMPD_target_data:
11311     case OMPD_distribute:
11312     case OMPD_distribute_simd:
11313     case OMPD_distribute_parallel_for:
11314     case OMPD_distribute_parallel_for_simd:
11315     case OMPD_teams_distribute:
11316     case OMPD_teams_distribute_simd:
11317     case OMPD_teams_distribute_parallel_for:
11318     case OMPD_teams_distribute_parallel_for_simd:
11319     case OMPD_declare_simd:
11320     case OMPD_declare_variant:
11321     case OMPD_begin_declare_variant:
11322     case OMPD_end_declare_variant:
11323     case OMPD_declare_target:
11324     case OMPD_end_declare_target:
11325     case OMPD_declare_reduction:
11326     case OMPD_declare_mapper:
11327     case OMPD_taskloop:
11328     case OMPD_taskloop_simd:
11329     case OMPD_master_taskloop:
11330     case OMPD_master_taskloop_simd:
11331     case OMPD_parallel_master_taskloop:
11332     case OMPD_parallel_master_taskloop_simd:
11333     case OMPD_target:
11334     case OMPD_target_simd:
11335     case OMPD_target_teams_distribute:
11336     case OMPD_target_teams_distribute_simd:
11337     case OMPD_target_teams_distribute_parallel_for:
11338     case OMPD_target_teams_distribute_parallel_for_simd:
11339     case OMPD_target_teams:
11340     case OMPD_target_parallel:
11341     case OMPD_target_parallel_for:
11342     case OMPD_target_parallel_for_simd:
11343     case OMPD_requires:
11344     case OMPD_unknown:
11345     default:
11346       llvm_unreachable("Unexpected standalone target data directive.");
11347       break;
11348     }
11349     CGF.EmitRuntimeCall(
11350         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11351         OffloadingArgs);
11352   };
11353 
11354   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11355                           &MapNamesArray](CodeGenFunction &CGF,
11356                                           PrePostActionTy &) {
11357     // Fill up the arrays with all the mapped variables.
11358     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11359 
11360     // Get map clause information.
11361     MappableExprsHandler MEHandler(D, CGF);
11362     MEHandler.generateAllInfo(CombinedInfo);
11363 
11364     TargetDataInfo Info;
11365     // Fill up the arrays and create the arguments.
11366     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11367                          /*IsNonContiguous=*/true);
11368     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11369                              D.hasClausesOfKind<OMPNowaitClause>();
11370     emitOffloadingArraysArgument(
11371         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11372         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11373         {/*ForEndTask=*/false});
11374     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11375     InputInfo.BasePointersArray =
11376         Address(Info.BasePointersArray, CGM.getPointerAlign());
11377     InputInfo.PointersArray =
11378         Address(Info.PointersArray, CGM.getPointerAlign());
11379     InputInfo.SizesArray =
11380         Address(Info.SizesArray, CGM.getPointerAlign());
11381     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11382     MapTypesArray = Info.MapTypesArray;
11383     MapNamesArray = Info.MapNamesArray;
11384     if (RequiresOuterTask)
11385       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11386     else
11387       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11388   };
11389 
11390   if (IfCond) {
11391     emitIfClause(CGF, IfCond, TargetThenGen,
11392                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11393   } else {
11394     RegionCodeGenTy ThenRCG(TargetThenGen);
11395     ThenRCG(CGF);
11396   }
11397 }
11398 
11399 namespace {
11400   /// Kind of parameter in a function with 'declare simd' directive.
11401   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11402   /// Attribute set of the parameter.
11403   struct ParamAttrTy {
11404     ParamKindTy Kind = Vector;
11405     llvm::APSInt StrideOrArg;
11406     llvm::APSInt Alignment;
11407   };
11408 } // namespace
11409 
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)11410 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11411                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11412   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11413   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11414   // of that clause. The VLEN value must be power of 2.
11415   // In other case the notion of the function`s "characteristic data type" (CDT)
11416   // is used to compute the vector length.
11417   // CDT is defined in the following order:
11418   //   a) For non-void function, the CDT is the return type.
11419   //   b) If the function has any non-uniform, non-linear parameters, then the
11420   //   CDT is the type of the first such parameter.
11421   //   c) If the CDT determined by a) or b) above is struct, union, or class
11422   //   type which is pass-by-value (except for the type that maps to the
11423   //   built-in complex data type), the characteristic data type is int.
11424   //   d) If none of the above three cases is applicable, the CDT is int.
11425   // The VLEN is then determined based on the CDT and the size of vector
11426   // register of that ISA for which current vector version is generated. The
11427   // VLEN is computed using the formula below:
11428   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11429   // where vector register size specified in section 3.2.1 Registers and the
11430   // Stack Frame of original AMD64 ABI document.
11431   QualType RetType = FD->getReturnType();
11432   if (RetType.isNull())
11433     return 0;
11434   ASTContext &C = FD->getASTContext();
11435   QualType CDT;
11436   if (!RetType.isNull() && !RetType->isVoidType()) {
11437     CDT = RetType;
11438   } else {
11439     unsigned Offset = 0;
11440     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11441       if (ParamAttrs[Offset].Kind == Vector)
11442         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11443       ++Offset;
11444     }
11445     if (CDT.isNull()) {
11446       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11447         if (ParamAttrs[I + Offset].Kind == Vector) {
11448           CDT = FD->getParamDecl(I)->getType();
11449           break;
11450         }
11451       }
11452     }
11453   }
11454   if (CDT.isNull())
11455     CDT = C.IntTy;
11456   CDT = CDT->getCanonicalTypeUnqualified();
11457   if (CDT->isRecordType() || CDT->isUnionType())
11458     CDT = C.IntTy;
11459   return C.getTypeSize(CDT);
11460 }
11461 
11462 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)11463 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11464                            const llvm::APSInt &VLENVal,
11465                            ArrayRef<ParamAttrTy> ParamAttrs,
11466                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11467   struct ISADataTy {
11468     char ISA;
11469     unsigned VecRegSize;
11470   };
11471   ISADataTy ISAData[] = {
11472       {
11473           'b', 128
11474       }, // SSE
11475       {
11476           'c', 256
11477       }, // AVX
11478       {
11479           'd', 256
11480       }, // AVX2
11481       {
11482           'e', 512
11483       }, // AVX512
11484   };
11485   llvm::SmallVector<char, 2> Masked;
11486   switch (State) {
11487   case OMPDeclareSimdDeclAttr::BS_Undefined:
11488     Masked.push_back('N');
11489     Masked.push_back('M');
11490     break;
11491   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11492     Masked.push_back('N');
11493     break;
11494   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11495     Masked.push_back('M');
11496     break;
11497   }
11498   for (char Mask : Masked) {
11499     for (const ISADataTy &Data : ISAData) {
11500       SmallString<256> Buffer;
11501       llvm::raw_svector_ostream Out(Buffer);
11502       Out << "_ZGV" << Data.ISA << Mask;
11503       if (!VLENVal) {
11504         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11505         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11506         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11507       } else {
11508         Out << VLENVal;
11509       }
11510       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11511         switch (ParamAttr.Kind){
11512         case LinearWithVarStride:
11513           Out << 's' << ParamAttr.StrideOrArg;
11514           break;
11515         case Linear:
11516           Out << 'l';
11517           if (ParamAttr.StrideOrArg != 1)
11518             Out << ParamAttr.StrideOrArg;
11519           break;
11520         case Uniform:
11521           Out << 'u';
11522           break;
11523         case Vector:
11524           Out << 'v';
11525           break;
11526         }
11527         if (!!ParamAttr.Alignment)
11528           Out << 'a' << ParamAttr.Alignment;
11529       }
11530       Out << '_' << Fn->getName();
11531       Fn->addFnAttr(Out.str());
11532     }
11533   }
11534 }
11535 
11536 // This are the Functions that are needed to mangle the name of the
11537 // vector functions generated by the compiler, according to the rules
11538 // defined in the "Vector Function ABI specifications for AArch64",
11539 // available at
11540 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11541 
11542 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11543 ///
11544 /// TODO: Need to implement the behavior for reference marked with a
11545 /// var or no linear modifiers (1.b in the section). For this, we
11546 /// need to extend ParamKindTy to support the linear modifiers.
getAArch64MTV(QualType QT,ParamKindTy Kind)11547 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11548   QT = QT.getCanonicalType();
11549 
11550   if (QT->isVoidType())
11551     return false;
11552 
11553   if (Kind == ParamKindTy::Uniform)
11554     return false;
11555 
11556   if (Kind == ParamKindTy::Linear)
11557     return false;
11558 
11559   // TODO: Handle linear references with modifiers
11560 
11561   if (Kind == ParamKindTy::LinearWithVarStride)
11562     return false;
11563 
11564   return true;
11565 }
11566 
11567 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)11568 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11569   QT = QT.getCanonicalType();
11570   unsigned Size = C.getTypeSize(QT);
11571 
11572   // Only scalars and complex within 16 bytes wide set PVB to true.
11573   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11574     return false;
11575 
11576   if (QT->isFloatingType())
11577     return true;
11578 
11579   if (QT->isIntegerType())
11580     return true;
11581 
11582   if (QT->isPointerType())
11583     return true;
11584 
11585   // TODO: Add support for complex types (section 3.1.2, item 2).
11586 
11587   return false;
11588 }
11589 
11590 /// Computes the lane size (LS) of a return type or of an input parameter,
11591 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11592 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)11593 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11594   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11595     QualType PTy = QT.getCanonicalType()->getPointeeType();
11596     if (getAArch64PBV(PTy, C))
11597       return C.getTypeSize(PTy);
11598   }
11599   if (getAArch64PBV(QT, C))
11600     return C.getTypeSize(QT);
11601 
11602   return C.getTypeSize(C.getUIntPtrType());
11603 }
11604 
11605 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11606 // signature of the scalar function, as defined in 3.2.2 of the
11607 // AAVFABI.
11608 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)11609 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11610   QualType RetType = FD->getReturnType().getCanonicalType();
11611 
11612   ASTContext &C = FD->getASTContext();
11613 
11614   bool OutputBecomesInput = false;
11615 
11616   llvm::SmallVector<unsigned, 8> Sizes;
11617   if (!RetType->isVoidType()) {
11618     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11619     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11620       OutputBecomesInput = true;
11621   }
11622   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11623     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11624     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11625   }
11626 
11627   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11628   // The LS of a function parameter / return value can only be a power
11629   // of 2, starting from 8 bits, up to 128.
11630   assert(std::all_of(Sizes.begin(), Sizes.end(),
11631                      [](unsigned Size) {
11632                        return Size == 8 || Size == 16 || Size == 32 ||
11633                               Size == 64 || Size == 128;
11634                      }) &&
11635          "Invalid size");
11636 
11637   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11638                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11639                          OutputBecomesInput);
11640 }
11641 
11642 /// Mangle the parameter part of the vector function name according to
11643 /// their OpenMP classification. The mangling function is defined in
11644 /// section 3.5 of the AAVFABI.
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)11645 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11646   SmallString<256> Buffer;
11647   llvm::raw_svector_ostream Out(Buffer);
11648   for (const auto &ParamAttr : ParamAttrs) {
11649     switch (ParamAttr.Kind) {
11650     case LinearWithVarStride:
11651       Out << "ls" << ParamAttr.StrideOrArg;
11652       break;
11653     case Linear:
11654       Out << 'l';
11655       // Don't print the step value if it is not present or if it is
11656       // equal to 1.
11657       if (ParamAttr.StrideOrArg != 1)
11658         Out << ParamAttr.StrideOrArg;
11659       break;
11660     case Uniform:
11661       Out << 'u';
11662       break;
11663     case Vector:
11664       Out << 'v';
11665       break;
11666     }
11667 
11668     if (!!ParamAttr.Alignment)
11669       Out << 'a' << ParamAttr.Alignment;
11670   }
11671 
11672   return std::string(Out.str());
11673 }
11674 
11675 // Function used to add the attribute. The parameter `VLEN` is
11676 // templated to allow the use of "x" when targeting scalable functions
11677 // for SVE.
11678 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)11679 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11680                                  char ISA, StringRef ParSeq,
11681                                  StringRef MangledName, bool OutputBecomesInput,
11682                                  llvm::Function *Fn) {
11683   SmallString<256> Buffer;
11684   llvm::raw_svector_ostream Out(Buffer);
11685   Out << Prefix << ISA << LMask << VLEN;
11686   if (OutputBecomesInput)
11687     Out << "v";
11688   Out << ParSeq << "_" << MangledName;
11689   Fn->addFnAttr(Out.str());
11690 }
11691 
11692 // Helper function to generate the Advanced SIMD names depending on
11693 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)11694 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11695                                       StringRef Prefix, char ISA,
11696                                       StringRef ParSeq, StringRef MangledName,
11697                                       bool OutputBecomesInput,
11698                                       llvm::Function *Fn) {
11699   switch (NDS) {
11700   case 8:
11701     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11702                          OutputBecomesInput, Fn);
11703     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11704                          OutputBecomesInput, Fn);
11705     break;
11706   case 16:
11707     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11708                          OutputBecomesInput, Fn);
11709     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11710                          OutputBecomesInput, Fn);
11711     break;
11712   case 32:
11713     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11714                          OutputBecomesInput, Fn);
11715     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11716                          OutputBecomesInput, Fn);
11717     break;
11718   case 64:
11719   case 128:
11720     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11721                          OutputBecomesInput, Fn);
11722     break;
11723   default:
11724     llvm_unreachable("Scalar type is too wide.");
11725   }
11726 }
11727 
11728 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)11729 static void emitAArch64DeclareSimdFunction(
11730     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11731     ArrayRef<ParamAttrTy> ParamAttrs,
11732     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11733     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11734 
11735   // Get basic data for building the vector signature.
11736   const auto Data = getNDSWDS(FD, ParamAttrs);
11737   const unsigned NDS = std::get<0>(Data);
11738   const unsigned WDS = std::get<1>(Data);
11739   const bool OutputBecomesInput = std::get<2>(Data);
11740 
11741   // Check the values provided via `simdlen` by the user.
11742   // 1. A `simdlen(1)` doesn't produce vector signatures,
11743   if (UserVLEN == 1) {
11744     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11745         DiagnosticsEngine::Warning,
11746         "The clause simdlen(1) has no effect when targeting aarch64.");
11747     CGM.getDiags().Report(SLoc, DiagID);
11748     return;
11749   }
11750 
11751   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11752   // Advanced SIMD output.
11753   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11754     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11755         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11756                                     "power of 2 when targeting Advanced SIMD.");
11757     CGM.getDiags().Report(SLoc, DiagID);
11758     return;
11759   }
11760 
11761   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11762   // limits.
11763   if (ISA == 's' && UserVLEN != 0) {
11764     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11765       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11766           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11767                                       "lanes in the architectural constraints "
11768                                       "for SVE (min is 128-bit, max is "
11769                                       "2048-bit, by steps of 128-bit)");
11770       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11771       return;
11772     }
11773   }
11774 
11775   // Sort out parameter sequence.
11776   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11777   StringRef Prefix = "_ZGV";
11778   // Generate simdlen from user input (if any).
11779   if (UserVLEN) {
11780     if (ISA == 's') {
11781       // SVE generates only a masked function.
11782       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11783                            OutputBecomesInput, Fn);
11784     } else {
11785       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11786       // Advanced SIMD generates one or two functions, depending on
11787       // the `[not]inbranch` clause.
11788       switch (State) {
11789       case OMPDeclareSimdDeclAttr::BS_Undefined:
11790         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11791                              OutputBecomesInput, Fn);
11792         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11793                              OutputBecomesInput, Fn);
11794         break;
11795       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11796         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11797                              OutputBecomesInput, Fn);
11798         break;
11799       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11800         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11801                              OutputBecomesInput, Fn);
11802         break;
11803       }
11804     }
11805   } else {
11806     // If no user simdlen is provided, follow the AAVFABI rules for
11807     // generating the vector length.
11808     if (ISA == 's') {
11809       // SVE, section 3.4.1, item 1.
11810       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11811                            OutputBecomesInput, Fn);
11812     } else {
11813       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11814       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11815       // two vector names depending on the use of the clause
11816       // `[not]inbranch`.
11817       switch (State) {
11818       case OMPDeclareSimdDeclAttr::BS_Undefined:
11819         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11820                                   OutputBecomesInput, Fn);
11821         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11822                                   OutputBecomesInput, Fn);
11823         break;
11824       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11825         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11826                                   OutputBecomesInput, Fn);
11827         break;
11828       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11829         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11830                                   OutputBecomesInput, Fn);
11831         break;
11832       }
11833     }
11834   }
11835 }
11836 
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)11837 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11838                                               llvm::Function *Fn) {
11839   ASTContext &C = CGM.getContext();
11840   FD = FD->getMostRecentDecl();
11841   // Map params to their positions in function decl.
11842   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11843   if (isa<CXXMethodDecl>(FD))
11844     ParamPositions.try_emplace(FD, 0);
11845   unsigned ParamPos = ParamPositions.size();
11846   for (const ParmVarDecl *P : FD->parameters()) {
11847     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11848     ++ParamPos;
11849   }
11850   while (FD) {
11851     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11852       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11853       // Mark uniform parameters.
11854       for (const Expr *E : Attr->uniforms()) {
11855         E = E->IgnoreParenImpCasts();
11856         unsigned Pos;
11857         if (isa<CXXThisExpr>(E)) {
11858           Pos = ParamPositions[FD];
11859         } else {
11860           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11861                                 ->getCanonicalDecl();
11862           Pos = ParamPositions[PVD];
11863         }
11864         ParamAttrs[Pos].Kind = Uniform;
11865       }
11866       // Get alignment info.
11867       auto NI = Attr->alignments_begin();
11868       for (const Expr *E : Attr->aligneds()) {
11869         E = E->IgnoreParenImpCasts();
11870         unsigned Pos;
11871         QualType ParmTy;
11872         if (isa<CXXThisExpr>(E)) {
11873           Pos = ParamPositions[FD];
11874           ParmTy = E->getType();
11875         } else {
11876           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11877                                 ->getCanonicalDecl();
11878           Pos = ParamPositions[PVD];
11879           ParmTy = PVD->getType();
11880         }
11881         ParamAttrs[Pos].Alignment =
11882             (*NI)
11883                 ? (*NI)->EvaluateKnownConstInt(C)
11884                 : llvm::APSInt::getUnsigned(
11885                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11886                           .getQuantity());
11887         ++NI;
11888       }
11889       // Mark linear parameters.
11890       auto SI = Attr->steps_begin();
11891       auto MI = Attr->modifiers_begin();
11892       for (const Expr *E : Attr->linears()) {
11893         E = E->IgnoreParenImpCasts();
11894         unsigned Pos;
11895         // Rescaling factor needed to compute the linear parameter
11896         // value in the mangled name.
11897         unsigned PtrRescalingFactor = 1;
11898         if (isa<CXXThisExpr>(E)) {
11899           Pos = ParamPositions[FD];
11900         } else {
11901           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11902                                 ->getCanonicalDecl();
11903           Pos = ParamPositions[PVD];
11904           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11905             PtrRescalingFactor = CGM.getContext()
11906                                      .getTypeSizeInChars(P->getPointeeType())
11907                                      .getQuantity();
11908         }
11909         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11910         ParamAttr.Kind = Linear;
11911         // Assuming a stride of 1, for `linear` without modifiers.
11912         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11913         if (*SI) {
11914           Expr::EvalResult Result;
11915           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11916             if (const auto *DRE =
11917                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11918               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11919                 ParamAttr.Kind = LinearWithVarStride;
11920                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11921                     ParamPositions[StridePVD->getCanonicalDecl()]);
11922               }
11923             }
11924           } else {
11925             ParamAttr.StrideOrArg = Result.Val.getInt();
11926           }
11927         }
11928         // If we are using a linear clause on a pointer, we need to
11929         // rescale the value of linear_step with the byte size of the
11930         // pointee type.
11931         if (Linear == ParamAttr.Kind)
11932           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11933         ++SI;
11934         ++MI;
11935       }
11936       llvm::APSInt VLENVal;
11937       SourceLocation ExprLoc;
11938       const Expr *VLENExpr = Attr->getSimdlen();
11939       if (VLENExpr) {
11940         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11941         ExprLoc = VLENExpr->getExprLoc();
11942       }
11943       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11944       if (CGM.getTriple().isX86()) {
11945         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11946       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11947         unsigned VLEN = VLENVal.getExtValue();
11948         StringRef MangledName = Fn->getName();
11949         if (CGM.getTarget().hasFeature("sve"))
11950           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11951                                          MangledName, 's', 128, Fn, ExprLoc);
11952         if (CGM.getTarget().hasFeature("neon"))
11953           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11954                                          MangledName, 'n', 128, Fn, ExprLoc);
11955       }
11956     }
11957     FD = FD->getPreviousDecl();
11958   }
11959 }
11960 
11961 namespace {
11962 /// Cleanup action for doacross support.
11963 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11964 public:
11965   static const int DoacrossFinArgs = 2;
11966 
11967 private:
11968   llvm::FunctionCallee RTLFn;
11969   llvm::Value *Args[DoacrossFinArgs];
11970 
11971 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11972   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11973                     ArrayRef<llvm::Value *> CallArgs)
11974       : RTLFn(RTLFn) {
11975     assert(CallArgs.size() == DoacrossFinArgs);
11976     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11977   }
Emit(CodeGenFunction & CGF,Flags)11978   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11979     if (!CGF.HaveInsertPoint())
11980       return;
11981     CGF.EmitRuntimeCall(RTLFn, Args);
11982   }
11983 };
11984 } // namespace
11985 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11986 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11987                                        const OMPLoopDirective &D,
11988                                        ArrayRef<Expr *> NumIterations) {
11989   if (!CGF.HaveInsertPoint())
11990     return;
11991 
11992   ASTContext &C = CGM.getContext();
11993   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11994   RecordDecl *RD;
11995   if (KmpDimTy.isNull()) {
11996     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11997     //  kmp_int64 lo; // lower
11998     //  kmp_int64 up; // upper
11999     //  kmp_int64 st; // stride
12000     // };
12001     RD = C.buildImplicitRecord("kmp_dim");
12002     RD->startDefinition();
12003     addFieldToRecordDecl(C, RD, Int64Ty);
12004     addFieldToRecordDecl(C, RD, Int64Ty);
12005     addFieldToRecordDecl(C, RD, Int64Ty);
12006     RD->completeDefinition();
12007     KmpDimTy = C.getRecordType(RD);
12008   } else {
12009     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12010   }
12011   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12012   QualType ArrayTy =
12013       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12014 
12015   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12016   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12017   enum { LowerFD = 0, UpperFD, StrideFD };
12018   // Fill dims with data.
12019   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12020     LValue DimsLVal = CGF.MakeAddrLValue(
12021         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12022     // dims.upper = num_iterations;
12023     LValue UpperLVal = CGF.EmitLValueForField(
12024         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12025     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12026         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12027         Int64Ty, NumIterations[I]->getExprLoc());
12028     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12029     // dims.stride = 1;
12030     LValue StrideLVal = CGF.EmitLValueForField(
12031         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12032     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12033                           StrideLVal);
12034   }
12035 
12036   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12037   // kmp_int32 num_dims, struct kmp_dim * dims);
12038   llvm::Value *Args[] = {
12039       emitUpdateLocation(CGF, D.getBeginLoc()),
12040       getThreadID(CGF, D.getBeginLoc()),
12041       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12042       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12043           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12044           CGM.VoidPtrTy)};
12045 
12046   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12047       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12048   CGF.EmitRuntimeCall(RTLFn, Args);
12049   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12050       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12051   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12052       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12053   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12054                                              llvm::makeArrayRef(FiniArgs));
12055 }
12056 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12057 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12058                                           const OMPDependClause *C) {
12059   QualType Int64Ty =
12060       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12061   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12062   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12063       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12064   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12065   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12066     const Expr *CounterVal = C->getLoopData(I);
12067     assert(CounterVal);
12068     llvm::Value *CntVal = CGF.EmitScalarConversion(
12069         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12070         CounterVal->getExprLoc());
12071     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12072                           /*Volatile=*/false, Int64Ty);
12073   }
12074   llvm::Value *Args[] = {
12075       emitUpdateLocation(CGF, C->getBeginLoc()),
12076       getThreadID(CGF, C->getBeginLoc()),
12077       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12078   llvm::FunctionCallee RTLFn;
12079   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12080     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12081                                                   OMPRTL___kmpc_doacross_post);
12082   } else {
12083     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12084     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12085                                                   OMPRTL___kmpc_doacross_wait);
12086   }
12087   CGF.EmitRuntimeCall(RTLFn, Args);
12088 }
12089 
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const12090 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12091                                llvm::FunctionCallee Callee,
12092                                ArrayRef<llvm::Value *> Args) const {
12093   assert(Loc.isValid() && "Outlined function call location must be valid.");
12094   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12095 
12096   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12097     if (Fn->doesNotThrow()) {
12098       CGF.EmitNounwindRuntimeCall(Fn, Args);
12099       return;
12100     }
12101   }
12102   CGF.EmitRuntimeCall(Callee, Args);
12103 }
12104 
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const12105 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12106     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12107     ArrayRef<llvm::Value *> Args) const {
12108   emitCall(CGF, Loc, OutlinedFn, Args);
12109 }
12110 
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)12111 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12112   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12113     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12114       HasEmittedDeclareTargetRegion = true;
12115 }
12116 
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12117 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12118                                              const VarDecl *NativeParam,
12119                                              const VarDecl *TargetParam) const {
12120   return CGF.GetAddrOfLocalVar(NativeParam);
12121 }
12122 
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)12123 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12124                                                    const VarDecl *VD) {
12125   if (!VD)
12126     return Address::invalid();
12127   Address UntiedAddr = Address::invalid();
12128   Address UntiedRealAddr = Address::invalid();
12129   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12130   if (It != FunctionToUntiedTaskStackMap.end()) {
12131     const UntiedLocalVarsAddressesMap &UntiedData =
12132         UntiedLocalVarsStack[It->second];
12133     auto I = UntiedData.find(VD);
12134     if (I != UntiedData.end()) {
12135       UntiedAddr = I->second.first;
12136       UntiedRealAddr = I->second.second;
12137     }
12138   }
12139   const VarDecl *CVD = VD->getCanonicalDecl();
12140   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12141     // Use the default allocation.
12142     if (!isAllocatableDecl(VD))
12143       return UntiedAddr;
12144     llvm::Value *Size;
12145     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12146     if (CVD->getType()->isVariablyModifiedType()) {
12147       Size = CGF.getTypeSize(CVD->getType());
12148       // Align the size: ((size + align - 1) / align) * align
12149       Size = CGF.Builder.CreateNUWAdd(
12150           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12151       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12152       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12153     } else {
12154       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12155       Size = CGM.getSize(Sz.alignTo(Align));
12156     }
12157     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12158     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12159     assert(AA->getAllocator() &&
12160            "Expected allocator expression for non-default allocator.");
12161     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12162     // According to the standard, the original allocator type is a enum
12163     // (integer). Convert to pointer type, if required.
12164     Allocator = CGF.EmitScalarConversion(
12165         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12166         AA->getAllocator()->getExprLoc());
12167     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12168 
12169     llvm::Value *Addr =
12170         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12171                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12172                             Args, getName({CVD->getName(), ".void.addr"}));
12173     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12174         CGM.getModule(), OMPRTL___kmpc_free);
12175     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12176     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12177         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12178     if (UntiedAddr.isValid())
12179       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12180 
12181     // Cleanup action for allocate support.
12182     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12183       llvm::FunctionCallee RTLFn;
12184       SourceLocation::UIntTy LocEncoding;
12185       Address Addr;
12186       const Expr *Allocator;
12187 
12188     public:
12189       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12190                            SourceLocation::UIntTy LocEncoding, Address Addr,
12191                            const Expr *Allocator)
12192           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12193             Allocator(Allocator) {}
12194       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12195         if (!CGF.HaveInsertPoint())
12196           return;
12197         llvm::Value *Args[3];
12198         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12199             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12200         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12201             Addr.getPointer(), CGF.VoidPtrTy);
12202         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12203         // According to the standard, the original allocator type is a enum
12204         // (integer). Convert to pointer type, if required.
12205         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12206                                             CGF.getContext().VoidPtrTy,
12207                                             Allocator->getExprLoc());
12208         Args[2] = AllocVal;
12209 
12210         CGF.EmitRuntimeCall(RTLFn, Args);
12211       }
12212     };
12213     Address VDAddr =
12214         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12215     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12216         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12217         VDAddr, AA->getAllocator());
12218     if (UntiedRealAddr.isValid())
12219       if (auto *Region =
12220               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12221         Region->emitUntiedSwitch(CGF);
12222     return VDAddr;
12223   }
12224   return UntiedAddr;
12225 }
12226 
isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const12227 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12228                                              const VarDecl *VD) const {
12229   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12230   if (It == FunctionToUntiedTaskStackMap.end())
12231     return false;
12232   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12233 }
12234 
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)12235 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12236     CodeGenModule &CGM, const OMPLoopDirective &S)
12237     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12238   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12239   if (!NeedToPush)
12240     return;
12241   NontemporalDeclsSet &DS =
12242       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12243   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12244     for (const Stmt *Ref : C->private_refs()) {
12245       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12246       const ValueDecl *VD;
12247       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12248         VD = DRE->getDecl();
12249       } else {
12250         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12251         assert((ME->isImplicitCXXThis() ||
12252                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12253                "Expected member of current class.");
12254         VD = ME->getMemberDecl();
12255       }
12256       DS.insert(VD);
12257     }
12258   }
12259 }
12260 
~NontemporalDeclsRAII()12261 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12262   if (!NeedToPush)
12263     return;
12264   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12265 }
12266 
UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)12267 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12268     CodeGenFunction &CGF,
12269     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12270                           std::pair<Address, Address>> &LocalVars)
12271     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12272   if (!NeedToPush)
12273     return;
12274   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12275       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12276   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12277 }
12278 
~UntiedTaskLocalDeclsRAII()12279 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12280   if (!NeedToPush)
12281     return;
12282   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12283 }
12284 
isNontemporalDecl(const ValueDecl * VD) const12285 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12286   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12287 
12288   return llvm::any_of(
12289       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12290       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12291 }
12292 
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const12293 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12294     const OMPExecutableDirective &S,
12295     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12296     const {
12297   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12298   // Vars in target/task regions must be excluded completely.
12299   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12300       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12301     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12302     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12303     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12304     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12305       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12306         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12307     }
12308   }
12309   // Exclude vars in private clauses.
12310   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12311     for (const Expr *Ref : C->varlists()) {
12312       if (!Ref->getType()->isScalarType())
12313         continue;
12314       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12315       if (!DRE)
12316         continue;
12317       NeedToCheckForLPCs.insert(DRE->getDecl());
12318     }
12319   }
12320   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12321     for (const Expr *Ref : C->varlists()) {
12322       if (!Ref->getType()->isScalarType())
12323         continue;
12324       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12325       if (!DRE)
12326         continue;
12327       NeedToCheckForLPCs.insert(DRE->getDecl());
12328     }
12329   }
12330   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12331     for (const Expr *Ref : C->varlists()) {
12332       if (!Ref->getType()->isScalarType())
12333         continue;
12334       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12335       if (!DRE)
12336         continue;
12337       NeedToCheckForLPCs.insert(DRE->getDecl());
12338     }
12339   }
12340   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12341     for (const Expr *Ref : C->varlists()) {
12342       if (!Ref->getType()->isScalarType())
12343         continue;
12344       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12345       if (!DRE)
12346         continue;
12347       NeedToCheckForLPCs.insert(DRE->getDecl());
12348     }
12349   }
12350   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12351     for (const Expr *Ref : C->varlists()) {
12352       if (!Ref->getType()->isScalarType())
12353         continue;
12354       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12355       if (!DRE)
12356         continue;
12357       NeedToCheckForLPCs.insert(DRE->getDecl());
12358     }
12359   }
12360   for (const Decl *VD : NeedToCheckForLPCs) {
12361     for (const LastprivateConditionalData &Data :
12362          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12363       if (Data.DeclToUniqueName.count(VD) > 0) {
12364         if (!Data.Disabled)
12365           NeedToAddForLPCsAsDisabled.insert(VD);
12366         break;
12367       }
12368     }
12369   }
12370 }
12371 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)12372 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12373     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12374     : CGM(CGF.CGM),
12375       Action((CGM.getLangOpts().OpenMP >= 50 &&
12376               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12377                            [](const OMPLastprivateClause *C) {
12378                              return C->getKind() ==
12379                                     OMPC_LASTPRIVATE_conditional;
12380                            }))
12381                  ? ActionToDo::PushAsLastprivateConditional
12382                  : ActionToDo::DoNotPush) {
12383   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12384   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12385     return;
12386   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12387          "Expected a push action.");
12388   LastprivateConditionalData &Data =
12389       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12390   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12391     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12392       continue;
12393 
12394     for (const Expr *Ref : C->varlists()) {
12395       Data.DeclToUniqueName.insert(std::make_pair(
12396           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12397           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12398     }
12399   }
12400   Data.IVLVal = IVLVal;
12401   Data.Fn = CGF.CurFn;
12402 }
12403 
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)12404 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12405     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12406     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12407   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12408   if (CGM.getLangOpts().OpenMP < 50)
12409     return;
12410   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12411   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12412   if (!NeedToAddForLPCsAsDisabled.empty()) {
12413     Action = ActionToDo::DisableLastprivateConditional;
12414     LastprivateConditionalData &Data =
12415         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12416     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12417       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12418     Data.Fn = CGF.CurFn;
12419     Data.Disabled = true;
12420   }
12421 }
12422 
12423 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)12424 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12425     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12426   return LastprivateConditionalRAII(CGF, S);
12427 }
12428 
~LastprivateConditionalRAII()12429 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12430   if (CGM.getLangOpts().OpenMP < 50)
12431     return;
12432   if (Action == ActionToDo::DisableLastprivateConditional) {
12433     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12434            "Expected list of disabled private vars.");
12435     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12436   }
12437   if (Action == ActionToDo::PushAsLastprivateConditional) {
12438     assert(
12439         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12440         "Expected list of lastprivate conditional vars.");
12441     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12442   }
12443 }
12444 
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)12445 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12446                                                         const VarDecl *VD) {
12447   ASTContext &C = CGM.getContext();
12448   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12449   if (I == LastprivateConditionalToTypes.end())
12450     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12451   QualType NewType;
12452   const FieldDecl *VDField;
12453   const FieldDecl *FiredField;
12454   LValue BaseLVal;
12455   auto VI = I->getSecond().find(VD);
12456   if (VI == I->getSecond().end()) {
12457     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12458     RD->startDefinition();
12459     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12460     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12461     RD->completeDefinition();
12462     NewType = C.getRecordType(RD);
12463     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12464     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12465     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12466   } else {
12467     NewType = std::get<0>(VI->getSecond());
12468     VDField = std::get<1>(VI->getSecond());
12469     FiredField = std::get<2>(VI->getSecond());
12470     BaseLVal = std::get<3>(VI->getSecond());
12471   }
12472   LValue FiredLVal =
12473       CGF.EmitLValueForField(BaseLVal, FiredField);
12474   CGF.EmitStoreOfScalar(
12475       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12476       FiredLVal);
12477   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12478 }
12479 
12480 namespace {
12481 /// Checks if the lastprivate conditional variable is referenced in LHS.
12482 class LastprivateConditionalRefChecker final
12483     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12484   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12485   const Expr *FoundE = nullptr;
12486   const Decl *FoundD = nullptr;
12487   StringRef UniqueDeclName;
12488   LValue IVLVal;
12489   llvm::Function *FoundFn = nullptr;
12490   SourceLocation Loc;
12491 
12492 public:
VisitDeclRefExpr(const DeclRefExpr * E)12493   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12494     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12495          llvm::reverse(LPM)) {
12496       auto It = D.DeclToUniqueName.find(E->getDecl());
12497       if (It == D.DeclToUniqueName.end())
12498         continue;
12499       if (D.Disabled)
12500         return false;
12501       FoundE = E;
12502       FoundD = E->getDecl()->getCanonicalDecl();
12503       UniqueDeclName = It->second;
12504       IVLVal = D.IVLVal;
12505       FoundFn = D.Fn;
12506       break;
12507     }
12508     return FoundE == E;
12509   }
VisitMemberExpr(const MemberExpr * E)12510   bool VisitMemberExpr(const MemberExpr *E) {
12511     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12512       return false;
12513     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12514          llvm::reverse(LPM)) {
12515       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12516       if (It == D.DeclToUniqueName.end())
12517         continue;
12518       if (D.Disabled)
12519         return false;
12520       FoundE = E;
12521       FoundD = E->getMemberDecl()->getCanonicalDecl();
12522       UniqueDeclName = It->second;
12523       IVLVal = D.IVLVal;
12524       FoundFn = D.Fn;
12525       break;
12526     }
12527     return FoundE == E;
12528   }
VisitStmt(const Stmt * S)12529   bool VisitStmt(const Stmt *S) {
12530     for (const Stmt *Child : S->children()) {
12531       if (!Child)
12532         continue;
12533       if (const auto *E = dyn_cast<Expr>(Child))
12534         if (!E->isGLValue())
12535           continue;
12536       if (Visit(Child))
12537         return true;
12538     }
12539     return false;
12540   }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)12541   explicit LastprivateConditionalRefChecker(
12542       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12543       : LPM(LPM) {}
12544   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const12545   getFoundData() const {
12546     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12547   }
12548 };
12549 } // namespace
12550 
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)12551 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12552                                                        LValue IVLVal,
12553                                                        StringRef UniqueDeclName,
12554                                                        LValue LVal,
12555                                                        SourceLocation Loc) {
12556   // Last updated loop counter for the lastprivate conditional var.
12557   // int<xx> last_iv = 0;
12558   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12559   llvm::Constant *LastIV =
12560       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12561   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12562       IVLVal.getAlignment().getAsAlign());
12563   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12564 
12565   // Last value of the lastprivate conditional.
12566   // decltype(priv_a) last_a;
12567   llvm::Constant *Last = getOrCreateInternalVariable(
12568       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12569   cast<llvm::GlobalVariable>(Last)->setAlignment(
12570       LVal.getAlignment().getAsAlign());
12571   LValue LastLVal =
12572       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12573 
12574   // Global loop counter. Required to handle inner parallel-for regions.
12575   // iv
12576   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12577 
12578   // #pragma omp critical(a)
12579   // if (last_iv <= iv) {
12580   //   last_iv = iv;
12581   //   last_a = priv_a;
12582   // }
12583   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12584                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12585     Action.Enter(CGF);
12586     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12587     // (last_iv <= iv) ? Check if the variable is updated and store new
12588     // value in global var.
12589     llvm::Value *CmpRes;
12590     if (IVLVal.getType()->isSignedIntegerType()) {
12591       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12592     } else {
12593       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12594              "Loop iteration variable must be integer.");
12595       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12596     }
12597     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12598     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12599     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12600     // {
12601     CGF.EmitBlock(ThenBB);
12602 
12603     //   last_iv = iv;
12604     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12605 
12606     //   last_a = priv_a;
12607     switch (CGF.getEvaluationKind(LVal.getType())) {
12608     case TEK_Scalar: {
12609       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12610       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12611       break;
12612     }
12613     case TEK_Complex: {
12614       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12615       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12616       break;
12617     }
12618     case TEK_Aggregate:
12619       llvm_unreachable(
12620           "Aggregates are not supported in lastprivate conditional.");
12621     }
12622     // }
12623     CGF.EmitBranch(ExitBB);
12624     // There is no need to emit line number for unconditional branch.
12625     (void)ApplyDebugLocation::CreateEmpty(CGF);
12626     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12627   };
12628 
12629   if (CGM.getLangOpts().OpenMPSimd) {
12630     // Do not emit as a critical region as no parallel region could be emitted.
12631     RegionCodeGenTy ThenRCG(CodeGen);
12632     ThenRCG(CGF);
12633   } else {
12634     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12635   }
12636 }
12637 
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)12638 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12639                                                          const Expr *LHS) {
12640   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12641     return;
12642   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12643   if (!Checker.Visit(LHS))
12644     return;
12645   const Expr *FoundE;
12646   const Decl *FoundD;
12647   StringRef UniqueDeclName;
12648   LValue IVLVal;
12649   llvm::Function *FoundFn;
12650   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12651       Checker.getFoundData();
12652   if (FoundFn != CGF.CurFn) {
12653     // Special codegen for inner parallel regions.
12654     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12655     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12656     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12657            "Lastprivate conditional is not found in outer region.");
12658     QualType StructTy = std::get<0>(It->getSecond());
12659     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12660     LValue PrivLVal = CGF.EmitLValue(FoundE);
12661     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12662         PrivLVal.getAddress(CGF),
12663         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12664     LValue BaseLVal =
12665         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12666     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12667     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12668                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12669                         FiredLVal, llvm::AtomicOrdering::Unordered,
12670                         /*IsVolatile=*/true, /*isInit=*/false);
12671     return;
12672   }
12673 
12674   // Private address of the lastprivate conditional in the current context.
12675   // priv_a
12676   LValue LVal = CGF.EmitLValue(FoundE);
12677   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12678                                    FoundE->getExprLoc());
12679 }
12680 
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)12681 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12682     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12683     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12684   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12685     return;
12686   auto Range = llvm::reverse(LastprivateConditionalStack);
12687   auto It = llvm::find_if(
12688       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12689   if (It == Range.end() || It->Fn != CGF.CurFn)
12690     return;
12691   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12692   assert(LPCI != LastprivateConditionalToTypes.end() &&
12693          "Lastprivates must be registered already.");
12694   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12695   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12696   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12697   for (const auto &Pair : It->DeclToUniqueName) {
12698     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12699     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12700       continue;
12701     auto I = LPCI->getSecond().find(Pair.first);
12702     assert(I != LPCI->getSecond().end() &&
12703            "Lastprivate must be rehistered already.");
12704     // bool Cmp = priv_a.Fired != 0;
12705     LValue BaseLVal = std::get<3>(I->getSecond());
12706     LValue FiredLVal =
12707         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12708     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12709     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12710     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12711     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12712     // if (Cmp) {
12713     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12714     CGF.EmitBlock(ThenBB);
12715     Address Addr = CGF.GetAddrOfLocalVar(VD);
12716     LValue LVal;
12717     if (VD->getType()->isReferenceType())
12718       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12719                                            AlignmentSource::Decl);
12720     else
12721       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12722                                 AlignmentSource::Decl);
12723     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12724                                      D.getBeginLoc());
12725     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12726     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12727     // }
12728   }
12729 }
12730 
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)12731 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12732     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12733     SourceLocation Loc) {
12734   if (CGF.getLangOpts().OpenMP < 50)
12735     return;
12736   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12737   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12738          "Unknown lastprivate conditional variable.");
12739   StringRef UniqueName = It->second;
12740   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12741   // The variable was not updated in the region - exit.
12742   if (!GV)
12743     return;
12744   LValue LPLVal = CGF.MakeAddrLValue(
12745       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12746   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12747   CGF.EmitStoreOfScalar(Res, PrivLVal);
12748 }
12749 
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12750 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12751     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12752     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12753   llvm_unreachable("Not supported in SIMD-only mode");
12754 }
12755 
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12756 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12757     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12758     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12759   llvm_unreachable("Not supported in SIMD-only mode");
12760 }
12761 
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)12762 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12763     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12764     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12765     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12766     bool Tied, unsigned &NumberOfParts) {
12767   llvm_unreachable("Not supported in SIMD-only mode");
12768 }
12769 
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)12770 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12771                                            SourceLocation Loc,
12772                                            llvm::Function *OutlinedFn,
12773                                            ArrayRef<llvm::Value *> CapturedVars,
12774                                            const Expr *IfCond) {
12775   llvm_unreachable("Not supported in SIMD-only mode");
12776 }
12777 
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)12778 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12779     CodeGenFunction &CGF, StringRef CriticalName,
12780     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12781     const Expr *Hint) {
12782   llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784 
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)12785 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12786                                            const RegionCodeGenTy &MasterOpGen,
12787                                            SourceLocation Loc) {
12788   llvm_unreachable("Not supported in SIMD-only mode");
12789 }
12790 
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)12791 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12792                                            const RegionCodeGenTy &MasterOpGen,
12793                                            SourceLocation Loc,
12794                                            const Expr *Filter) {
12795   llvm_unreachable("Not supported in SIMD-only mode");
12796 }
12797 
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)12798 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12799                                             SourceLocation Loc) {
12800   llvm_unreachable("Not supported in SIMD-only mode");
12801 }
12802 
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12803 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12804     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12805     SourceLocation Loc) {
12806   llvm_unreachable("Not supported in SIMD-only mode");
12807 }
12808 
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12809 void CGOpenMPSIMDRuntime::emitSingleRegion(
12810     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12811     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12812     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12813     ArrayRef<const Expr *> AssignmentOps) {
12814   llvm_unreachable("Not supported in SIMD-only mode");
12815 }
12816 
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12817 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12818                                             const RegionCodeGenTy &OrderedOpGen,
12819                                             SourceLocation Loc,
12820                                             bool IsThreads) {
12821   llvm_unreachable("Not supported in SIMD-only mode");
12822 }
12823 
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12824 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12825                                           SourceLocation Loc,
12826                                           OpenMPDirectiveKind Kind,
12827                                           bool EmitChecks,
12828                                           bool ForceSimpleCall) {
12829   llvm_unreachable("Not supported in SIMD-only mode");
12830 }
12831 
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12832 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12833     CodeGenFunction &CGF, SourceLocation Loc,
12834     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12835     bool Ordered, const DispatchRTInput &DispatchValues) {
12836   llvm_unreachable("Not supported in SIMD-only mode");
12837 }
12838 
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12839 void CGOpenMPSIMDRuntime::emitForStaticInit(
12840     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12841     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12842   llvm_unreachable("Not supported in SIMD-only mode");
12843 }
12844 
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12845 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12846     CodeGenFunction &CGF, SourceLocation Loc,
12847     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12848   llvm_unreachable("Not supported in SIMD-only mode");
12849 }
12850 
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12851 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12852                                                      SourceLocation Loc,
12853                                                      unsigned IVSize,
12854                                                      bool IVSigned) {
12855   llvm_unreachable("Not supported in SIMD-only mode");
12856 }
12857 
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12858 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12859                                               SourceLocation Loc,
12860                                               OpenMPDirectiveKind DKind) {
12861   llvm_unreachable("Not supported in SIMD-only mode");
12862 }
12863 
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12864 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12865                                               SourceLocation Loc,
12866                                               unsigned IVSize, bool IVSigned,
12867                                               Address IL, Address LB,
12868                                               Address UB, Address ST) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12872 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12873                                                llvm::Value *NumThreads,
12874                                                SourceLocation Loc) {
12875   llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877 
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12878 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12879                                              ProcBindKind ProcBind,
12880                                              SourceLocation Loc) {
12881   llvm_unreachable("Not supported in SIMD-only mode");
12882 }
12883 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12884 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12885                                                     const VarDecl *VD,
12886                                                     Address VDAddr,
12887                                                     SourceLocation Loc) {
12888   llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890 
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12891 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12892     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12893     CodeGenFunction *CGF) {
12894   llvm_unreachable("Not supported in SIMD-only mode");
12895 }
12896 
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12897 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12898     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12899   llvm_unreachable("Not supported in SIMD-only mode");
12900 }
12901 
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12902 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12903                                     ArrayRef<const Expr *> Vars,
12904                                     SourceLocation Loc,
12905                                     llvm::AtomicOrdering AO) {
12906   llvm_unreachable("Not supported in SIMD-only mode");
12907 }
12908 
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12909 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12910                                        const OMPExecutableDirective &D,
12911                                        llvm::Function *TaskFunction,
12912                                        QualType SharedsTy, Address Shareds,
12913                                        const Expr *IfCond,
12914                                        const OMPTaskDataTy &Data) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12918 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12919     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12920     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12921     const Expr *IfCond, const OMPTaskDataTy &Data) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12925 void CGOpenMPSIMDRuntime::emitReduction(
12926     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12927     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12928     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12929   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12930   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12931                                  ReductionOps, Options);
12932 }
12933 
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12934 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12935     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12936     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12937   llvm_unreachable("Not supported in SIMD-only mode");
12938 }
12939 
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12940 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12941                                                 SourceLocation Loc,
12942                                                 bool IsWorksharingReduction) {
12943   llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945 
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12946 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12947                                                   SourceLocation Loc,
12948                                                   ReductionCodeGen &RCG,
12949                                                   unsigned N) {
12950   llvm_unreachable("Not supported in SIMD-only mode");
12951 }
12952 
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12953 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12954                                                   SourceLocation Loc,
12955                                                   llvm::Value *ReductionsPtr,
12956                                                   LValue SharedLVal) {
12957   llvm_unreachable("Not supported in SIMD-only mode");
12958 }
12959 
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)12960 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12961                                            SourceLocation Loc) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12965 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12966     CodeGenFunction &CGF, SourceLocation Loc,
12967     OpenMPDirectiveKind CancelRegion) {
12968   llvm_unreachable("Not supported in SIMD-only mode");
12969 }
12970 
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12971 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12972                                          SourceLocation Loc, const Expr *IfCond,
12973                                          OpenMPDirectiveKind CancelRegion) {
12974   llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976 
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12977 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12978     const OMPExecutableDirective &D, StringRef ParentName,
12979     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12980     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12981   llvm_unreachable("Not supported in SIMD-only mode");
12982 }
12983 
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)12984 void CGOpenMPSIMDRuntime::emitTargetCall(
12985     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12986     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12987     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12988     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12989                                      const OMPLoopDirective &D)>
12990         SizeEmitter) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
emitTargetFunctions(GlobalDecl GD)12994 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12995   llvm_unreachable("Not supported in SIMD-only mode");
12996 }
12997 
emitTargetGlobalVariable(GlobalDecl GD)12998 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12999   llvm_unreachable("Not supported in SIMD-only mode");
13000 }
13001 
emitTargetGlobal(GlobalDecl GD)13002 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13003   return false;
13004 }
13005 
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)13006 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13007                                         const OMPExecutableDirective &D,
13008                                         SourceLocation Loc,
13009                                         llvm::Function *OutlinedFn,
13010                                         ArrayRef<llvm::Value *> CapturedVars) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)13014 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13015                                              const Expr *NumTeams,
13016                                              const Expr *ThreadLimit,
13017                                              SourceLocation Loc) {
13018   llvm_unreachable("Not supported in SIMD-only mode");
13019 }
13020 
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)13021 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13022     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13023     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13024   llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026 
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)13027 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13028     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13029     const Expr *Device) {
13030   llvm_unreachable("Not supported in SIMD-only mode");
13031 }
13032 
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)13033 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13034                                            const OMPLoopDirective &D,
13035                                            ArrayRef<Expr *> NumIterations) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)13039 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13040                                               const OMPDependClause *C) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
13044 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const13045 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13046                                         const VarDecl *NativeParam) const {
13047   llvm_unreachable("Not supported in SIMD-only mode");
13048 }
13049 
13050 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const13051 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13052                                          const VarDecl *NativeParam,
13053                                          const VarDecl *TargetParam) const {
13054   llvm_unreachable("Not supported in SIMD-only mode");
13055 }
13056