1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1333                                              bool AtCurrentPoint) {
1334   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1336 
1337   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338   if (AtCurrentPoint) {
1339     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341   } else {
1342     Elem.second.ServiceInsertPt =
1343         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1345   }
1346 }
1347 
1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1349   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350   if (Elem.second.ServiceInsertPt) {
1351     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352     Elem.second.ServiceInsertPt = nullptr;
1353     Ptr->eraseFromParent();
1354   }
1355 }
1356 
1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1358                                                   SourceLocation Loc,
1359                                                   SmallString<128> &Buffer) {
1360   llvm::raw_svector_ostream OS(Buffer);
1361   // Build debug location
1362   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1363   OS << ";" << PLoc.getFilename() << ";";
1364   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365     OS << FD->getQualifiedNameAsString();
1366   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367   return OS.str();
1368 }
1369 
1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1371                                                  SourceLocation Loc,
1372                                                  unsigned Flags) {
1373   uint32_t SrcLocStrSize;
1374   llvm::Constant *SrcLocStr;
1375   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376       Loc.isInvalid()) {
1377     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378   } else {
1379     std::string FunctionName;
1380     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381       FunctionName = FD->getQualifiedNameAsString();
1382     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1383     const char *FileName = PLoc.getFilename();
1384     unsigned Line = PLoc.getLine();
1385     unsigned Column = PLoc.getColumn();
1386     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387                                                 Column, SrcLocStrSize);
1388   }
1389   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390   return OMPBuilder.getOrCreateIdent(
1391       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1392 }
1393 
1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1395                                           SourceLocation Loc) {
1396   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1397   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398   // the clang invariants used below might be broken.
1399   if (CGM.getLangOpts().OpenMPIRBuilder) {
1400     SmallString<128> Buffer;
1401     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402     uint32_t SrcLocStrSize;
1403     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405     return OMPBuilder.getOrCreateThreadID(
1406         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1407   }
1408 
1409   llvm::Value *ThreadID = nullptr;
1410   // Check whether we've already cached a load of the thread id in this
1411   // function.
1412   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413   if (I != OpenMPLocThreadIDMap.end()) {
1414     ThreadID = I->second.ThreadID;
1415     if (ThreadID != nullptr)
1416       return ThreadID;
1417   }
1418   // If exceptions are enabled, do not use parameter to avoid possible crash.
1419   if (auto *OMPRegionInfo =
1420           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421     if (OMPRegionInfo->getThreadIDVariable()) {
1422       // Check if this an outlined function with thread id passed as argument.
1423       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426           !CGF.getLangOpts().CXXExceptions ||
1427           CGF.Builder.GetInsertBlock() == TopBlock ||
1428           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430               TopBlock ||
1431           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432               CGF.Builder.GetInsertBlock()) {
1433         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434         // If value loaded in entry block, cache it and use it everywhere in
1435         // function.
1436         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438           Elem.second.ThreadID = ThreadID;
1439         }
1440         return ThreadID;
1441       }
1442     }
1443   }
1444 
1445   // This is not an outlined function region - need to call __kmpc_int32
1446   // kmpc_global_thread_num(ident_t *loc).
1447   // Generate thread id value and cache this value for use across the
1448   // function.
1449   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450   if (!Elem.second.ServiceInsertPt)
1451     setLocThreadIdInsertPt(CGF);
1452   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454   llvm::CallInst *Call = CGF.Builder.CreateCall(
1455       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456                                             OMPRTL___kmpc_global_thread_num),
1457       emitUpdateLocation(CGF, Loc));
1458   Call->setCallingConv(CGF.getRuntimeCC());
1459   Elem.second.ThreadID = Call;
1460   return Call;
1461 }
1462 
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466     clearLocThreadIdInsertPt(CGF);
1467     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468   }
1469   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471       UDRMap.erase(D);
1472     FunctionUDRMap.erase(CGF.CurFn);
1473   }
1474   auto I = FunctionUDMMap.find(CGF.CurFn);
1475   if (I != FunctionUDMMap.end()) {
1476     for(const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489   if (!Kmpc_MicroTy) {
1490     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494   }
1495   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497 
1498 llvm::FunctionCallee
1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500                                              bool IsGPUDistribute) {
1501   assert((IVSize == 32 || IVSize == 64) &&
1502          "IV size is not compatible with the omp runtime");
1503   StringRef Name;
1504   if (IsGPUDistribute)
1505     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506                                     : "__kmpc_distribute_static_init_4u")
1507                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1508                                     : "__kmpc_distribute_static_init_8u");
1509   else
1510     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511                                     : "__kmpc_for_static_init_4u")
1512                         : (IVSigned ? "__kmpc_for_static_init_8"
1513                                     : "__kmpc_for_static_init_8u");
1514 
1515   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517   llvm::Type *TypeParams[] = {
1518     getIdentTyPointerTy(),                     // loc
1519     CGM.Int32Ty,                               // tid
1520     CGM.Int32Ty,                               // schedtype
1521     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522     PtrTy,                                     // p_lower
1523     PtrTy,                                     // p_upper
1524     PtrTy,                                     // p_stride
1525     ITy,                                       // incr
1526     ITy                                        // chunk
1527   };
1528   auto *FnTy =
1529       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530   return CGM.CreateRuntimeFunction(FnTy, Name);
1531 }
1532 
1533 llvm::FunctionCallee
1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535   assert((IVSize == 32 || IVSize == 64) &&
1536          "IV size is not compatible with the omp runtime");
1537   StringRef Name =
1538       IVSize == 32
1539           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543                                CGM.Int32Ty,           // tid
1544                                CGM.Int32Ty,           // schedtype
1545                                ITy,                   // lower
1546                                ITy,                   // upper
1547                                ITy,                   // stride
1548                                ITy                    // chunk
1549   };
1550   auto *FnTy =
1551       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552   return CGM.CreateRuntimeFunction(FnTy, Name);
1553 }
1554 
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557   assert((IVSize == 32 || IVSize == 64) &&
1558          "IV size is not compatible with the omp runtime");
1559   StringRef Name =
1560       IVSize == 32
1561           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563   llvm::Type *TypeParams[] = {
1564       getIdentTyPointerTy(), // loc
1565       CGM.Int32Ty,           // tid
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy                                      // p_stride
1589   };
1590   auto *FnTy =
1591       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592   return CGM.CreateRuntimeFunction(FnTy, Name);
1593 }
1594 
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1599                                      unsigned &DeviceID, unsigned &FileID,
1600                                      unsigned &LineNum) {
1601   SourceManager &SM = C.getSourceManager();
1602 
1603   // The loc should be always valid and have a file ID (the user cannot use
1604   // #pragma directives in macros)
1605 
1606   assert(Loc.isValid() && "Source location is expected to be always valid.");
1607 
1608   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1610 
1611   llvm::sys::fs::UniqueID ID;
1612   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1615     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617           << PLoc.getFilename() << EC.message();
1618   }
1619 
1620   DeviceID = ID.getDevice();
1621   FileID = ID.getFile();
1622   LineNum = PLoc.getLine();
1623 }
1624 
1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1626   if (CGM.getLangOpts().OpenMPSimd)
1627     return Address::invalid();
1628   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1629       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1630   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1631               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1632                HasRequiresUnifiedSharedMemory))) {
1633     SmallString<64> PtrName;
1634     {
1635       llvm::raw_svector_ostream OS(PtrName);
1636       OS << CGM.getMangledName(GlobalDecl(VD));
1637       if (!VD->isExternallyVisible()) {
1638         unsigned DeviceID, FileID, Line;
1639         getTargetEntryUniqueInfo(CGM.getContext(),
1640                                  VD->getCanonicalDecl()->getBeginLoc(),
1641                                  DeviceID, FileID, Line);
1642         OS << llvm::format("_%x", FileID);
1643       }
1644       OS << "_decl_tgt_ref_ptr";
1645     }
1646     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1647     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1648     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1649     if (!Ptr) {
1650       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1651 
1652       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1653       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1654 
1655       if (!CGM.getLangOpts().OpenMPIsDevice)
1656         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1657       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1658     }
1659     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1660   }
1661   return Address::invalid();
1662 }
1663 
1664 llvm::Constant *
1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1666   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1667          !CGM.getContext().getTargetInfo().isTLSSupported());
1668   // Lookup the entry, lazily creating it if necessary.
1669   std::string Suffix = getName({"cache", ""});
1670   return getOrCreateInternalVariable(
1671       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1672 }
1673 
1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1675                                                 const VarDecl *VD,
1676                                                 Address VDAddr,
1677                                                 SourceLocation Loc) {
1678   if (CGM.getLangOpts().OpenMPUseTLS &&
1679       CGM.getContext().getTargetInfo().isTLSSupported())
1680     return VDAddr;
1681 
1682   llvm::Type *VarTy = VDAddr.getElementType();
1683   llvm::Value *Args[] = {
1684       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1685       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1686       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1687       getOrCreateThreadPrivateCache(VD)};
1688   return Address(
1689       CGF.EmitRuntimeCall(
1690           OMPBuilder.getOrCreateRuntimeFunction(
1691               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1692           Args),
1693       CGF.Int8Ty, VDAddr.getAlignment());
1694 }
1695 
1696 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1697     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1698     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1699   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1700   // library.
1701   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1702   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1703                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1704                       OMPLoc);
1705   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1706   // to register constructor/destructor for variable.
1707   llvm::Value *Args[] = {
1708       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1709       Ctor, CopyCtor, Dtor};
1710   CGF.EmitRuntimeCall(
1711       OMPBuilder.getOrCreateRuntimeFunction(
1712           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1713       Args);
1714 }
1715 
1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1717     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1718     bool PerformInit, CodeGenFunction *CGF) {
1719   if (CGM.getLangOpts().OpenMPUseTLS &&
1720       CGM.getContext().getTargetInfo().isTLSSupported())
1721     return nullptr;
1722 
1723   VD = VD->getDefinition(CGM.getContext());
1724   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1725     QualType ASTTy = VD->getType();
1726 
1727     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1728     const Expr *Init = VD->getAnyInitializer();
1729     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1730       // Generate function that re-emits the declaration's initializer into the
1731       // threadprivate copy of the variable VD
1732       CodeGenFunction CtorCGF(CGM);
1733       FunctionArgList Args;
1734       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1735                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1736                             ImplicitParamDecl::Other);
1737       Args.push_back(&Dst);
1738 
1739       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1740           CGM.getContext().VoidPtrTy, Args);
1741       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1742       std::string Name = getName({"__kmpc_global_ctor_", ""});
1743       llvm::Function *Fn =
1744           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1745       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1746                             Args, Loc, Loc);
1747       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1748           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749           CGM.getContext().VoidPtrTy, Dst.getLocation());
1750       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1751       Arg = CtorCGF.Builder.CreateElementBitCast(
1752           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1753       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1754                                /*IsInitializer=*/true);
1755       ArgVal = CtorCGF.EmitLoadOfScalar(
1756           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1757           CGM.getContext().VoidPtrTy, Dst.getLocation());
1758       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1759       CtorCGF.FinishFunction();
1760       Ctor = Fn;
1761     }
1762     if (VD->getType().isDestructedType() != QualType::DK_none) {
1763       // Generate function that emits destructor call for the threadprivate copy
1764       // of the variable VD
1765       CodeGenFunction DtorCGF(CGM);
1766       FunctionArgList Args;
1767       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769                             ImplicitParamDecl::Other);
1770       Args.push_back(&Dst);
1771 
1772       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773           CGM.getContext().VoidTy, Args);
1774       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775       std::string Name = getName({"__kmpc_global_dtor_", ""});
1776       llvm::Function *Fn =
1777           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1779       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1780                             Loc, Loc);
1781       // Create a scope with an artificial location for the body of this function.
1782       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1783       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1784           DtorCGF.GetAddrOfLocalVar(&Dst),
1785           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1786       DtorCGF.emitDestroy(
1787           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1788           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1789           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1790       DtorCGF.FinishFunction();
1791       Dtor = Fn;
1792     }
1793     // Do not emit init function if it is not required.
1794     if (!Ctor && !Dtor)
1795       return nullptr;
1796 
1797     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1798     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1799                                                /*isVarArg=*/false)
1800                            ->getPointerTo();
1801     // Copying constructor for the threadprivate variable.
1802     // Must be NULL - reserved by runtime, but currently it requires that this
1803     // parameter is always NULL. Otherwise it fires assertion.
1804     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1805     if (Ctor == nullptr) {
1806       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1807                                              /*isVarArg=*/false)
1808                          ->getPointerTo();
1809       Ctor = llvm::Constant::getNullValue(CtorTy);
1810     }
1811     if (Dtor == nullptr) {
1812       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1813                                              /*isVarArg=*/false)
1814                          ->getPointerTo();
1815       Dtor = llvm::Constant::getNullValue(DtorTy);
1816     }
1817     if (!CGF) {
1818       auto *InitFunctionTy =
1819           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1820       std::string Name = getName({"__omp_threadprivate_init_", ""});
1821       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1822           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1823       CodeGenFunction InitCGF(CGM);
1824       FunctionArgList ArgList;
1825       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1826                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1827                             Loc, Loc);
1828       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1829       InitCGF.FinishFunction();
1830       return InitFunction;
1831     }
1832     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1833   }
1834   return nullptr;
1835 }
1836 
1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1838                                                      llvm::GlobalVariable *Addr,
1839                                                      bool PerformInit) {
1840   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1841       !CGM.getLangOpts().OpenMPIsDevice)
1842     return false;
1843   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1844       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1845   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1846       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1847        HasRequiresUnifiedSharedMemory))
1848     return CGM.getLangOpts().OpenMPIsDevice;
1849   VD = VD->getDefinition(CGM.getContext());
1850   assert(VD && "Unknown VarDecl");
1851 
1852   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1853     return CGM.getLangOpts().OpenMPIsDevice;
1854 
1855   QualType ASTTy = VD->getType();
1856   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1857 
1858   // Produce the unique prefix to identify the new target regions. We use
1859   // the source location of the variable declaration which we know to not
1860   // conflict with any target region.
1861   unsigned DeviceID;
1862   unsigned FileID;
1863   unsigned Line;
1864   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1865   SmallString<128> Buffer, Out;
1866   {
1867     llvm::raw_svector_ostream OS(Buffer);
1868     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1869        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1870   }
1871 
1872   const Expr *Init = VD->getAnyInitializer();
1873   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1874     llvm::Constant *Ctor;
1875     llvm::Constant *ID;
1876     if (CGM.getLangOpts().OpenMPIsDevice) {
1877       // Generate function that re-emits the declaration's initializer into
1878       // the threadprivate copy of the variable VD
1879       CodeGenFunction CtorCGF(CGM);
1880 
1881       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1882       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1883       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1884           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1885           llvm::GlobalValue::WeakODRLinkage);
1886       if (CGM.getTriple().isAMDGCN())
1887         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1888       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1889       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1890                             FunctionArgList(), Loc, Loc);
1891       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1892       llvm::Constant *AddrInAS0 = Addr;
1893       if (Addr->getAddressSpace() != 0)
1894         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1895             Addr, llvm::PointerType::getWithSamePointeeType(
1896                       cast<llvm::PointerType>(Addr->getType()), 0));
1897       CtorCGF.EmitAnyExprToMem(Init,
1898                                Address(AddrInAS0, Addr->getValueType(),
1899                                        CGM.getContext().getDeclAlign(VD)),
1900                                Init->getType().getQualifiers(),
1901                                /*IsInitializer=*/true);
1902       CtorCGF.FinishFunction();
1903       Ctor = Fn;
1904       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905     } else {
1906       Ctor = new llvm::GlobalVariable(
1907           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1908           llvm::GlobalValue::PrivateLinkage,
1909           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1910       ID = Ctor;
1911     }
1912 
1913     // Register the information for the entry associated with the constructor.
1914     Out.clear();
1915     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1916         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1917         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1918   }
1919   if (VD->getType().isDestructedType() != QualType::DK_none) {
1920     llvm::Constant *Dtor;
1921     llvm::Constant *ID;
1922     if (CGM.getLangOpts().OpenMPIsDevice) {
1923       // Generate function that emits destructor call for the threadprivate
1924       // copy of the variable VD
1925       CodeGenFunction DtorCGF(CGM);
1926 
1927       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1928       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1929       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1930           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1931           llvm::GlobalValue::WeakODRLinkage);
1932       if (CGM.getTriple().isAMDGCN())
1933         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1934       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1935       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       // Create a scope with an artificial location for the body of this
1938       // function.
1939       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1940       llvm::Constant *AddrInAS0 = Addr;
1941       if (Addr->getAddressSpace() != 0)
1942         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1943             Addr, llvm::PointerType::getWithSamePointeeType(
1944                       cast<llvm::PointerType>(Addr->getType()), 0));
1945       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1946                                   CGM.getContext().getDeclAlign(VD)),
1947                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1948                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1949       DtorCGF.FinishFunction();
1950       Dtor = Fn;
1951       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1952     } else {
1953       Dtor = new llvm::GlobalVariable(
1954           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955           llvm::GlobalValue::PrivateLinkage,
1956           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1957       ID = Dtor;
1958     }
1959     // Register the information for the entry associated with the destructor.
1960     Out.clear();
1961     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1963         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1964   }
1965   return CGM.getLangOpts().OpenMPIsDevice;
1966 }
1967 
1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1969                                                           QualType VarType,
1970                                                           StringRef Name) {
1971   std::string Suffix = getName({"artificial", ""});
1972   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1973   llvm::GlobalVariable *GAddr =
1974       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1975   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1976       CGM.getTarget().isTLSSupported()) {
1977     GAddr->setThreadLocal(/*Val=*/true);
1978     return Address(GAddr, GAddr->getValueType(),
1979                    CGM.getContext().getTypeAlignInChars(VarType));
1980   }
1981   std::string CacheSuffix = getName({"cache", ""});
1982   llvm::Value *Args[] = {
1983       emitUpdateLocation(CGF, SourceLocation()),
1984       getThreadID(CGF, SourceLocation()),
1985       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1986       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1987                                 /*isSigned=*/false),
1988       getOrCreateInternalVariable(
1989           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1990   return Address(
1991       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1992           CGF.EmitRuntimeCall(
1993               OMPBuilder.getOrCreateRuntimeFunction(
1994                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1995               Args),
1996           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1997       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1998 }
1999 
2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2001                                    const RegionCodeGenTy &ThenGen,
2002                                    const RegionCodeGenTy &ElseGen) {
2003   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004 
2005   // If the condition constant folds and can be elided, try to avoid emitting
2006   // the condition and the dead arm of the if/else.
2007   bool CondConstant;
2008   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2009     if (CondConstant)
2010       ThenGen(CGF);
2011     else
2012       ElseGen(CGF);
2013     return;
2014   }
2015 
2016   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2017   // emit the conditional branch.
2018   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2019   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2020   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2021   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022 
2023   // Emit the 'then' code.
2024   CGF.EmitBlock(ThenBlock);
2025   ThenGen(CGF);
2026   CGF.EmitBranch(ContBlock);
2027   // Emit the 'else' code if present.
2028   // There is no need to emit line number for unconditional branch.
2029   (void)ApplyDebugLocation::CreateEmpty(CGF);
2030   CGF.EmitBlock(ElseBlock);
2031   ElseGen(CGF);
2032   // There is no need to emit line number for unconditional branch.
2033   (void)ApplyDebugLocation::CreateEmpty(CGF);
2034   CGF.EmitBranch(ContBlock);
2035   // Emit the continuation block for code after the if.
2036   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2037 }
2038 
2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2040                                        llvm::Function *OutlinedFn,
2041                                        ArrayRef<llvm::Value *> CapturedVars,
2042                                        const Expr *IfCond,
2043                                        llvm::Value *NumThreads) {
2044   if (!CGF.HaveInsertPoint())
2045     return;
2046   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2047   auto &M = CGM.getModule();
2048   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2049                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2050     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2051     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2052     llvm::Value *Args[] = {
2053         RTLoc,
2054         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2055         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2056     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2057     RealArgs.append(std::begin(Args), std::end(Args));
2058     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059 
2060     llvm::FunctionCallee RTLFn =
2061         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2062     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063   };
2064   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2065                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2068     // Build calls:
2069     // __kmpc_serialized_parallel(&Loc, GTid);
2070     llvm::Value *Args[] = {RTLoc, ThreadID};
2071     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2072                             M, OMPRTL___kmpc_serialized_parallel),
2073                         Args);
2074 
2075     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2076     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2077     Address ZeroAddrBound =
2078         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2079                                          /*Name=*/".bound.zero.addr");
2080     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2081     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2082     // ThreadId for serialized parallels is 0.
2083     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2084     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2085     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 
2087     // Ensure we do not inline the function. This is trivially true for the ones
2088     // passed to __kmpc_fork_call but the ones called in serialized regions
2089     // could be inlined. This is not a perfect but it is closer to the invariant
2090     // we want, namely, every data environment starts with a new function.
2091     // TODO: We should pass the if condition to the runtime function and do the
2092     //       handling there. Much cleaner code.
2093     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2094     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2095     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096 
2097     // __kmpc_end_serialized_parallel(&Loc, GTid);
2098     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2099     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100                             M, OMPRTL___kmpc_end_serialized_parallel),
2101                         EndArgs);
2102   };
2103   if (IfCond) {
2104     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2105   } else {
2106     RegionCodeGenTy ThenRCG(ThenGen);
2107     ThenRCG(CGF);
2108   }
2109 }
2110 
2111 // If we're inside an (outlined) parallel region, use the region info's
2112 // thread-ID variable (it is passed in a first argument of the outlined function
2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2114 // regular serial code region, get thread ID by calling kmp_int32
2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2116 // return the address of that temp.
2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2118                                              SourceLocation Loc) {
2119   if (auto *OMPRegionInfo =
2120           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2121     if (OMPRegionInfo->getThreadIDVariable())
2122       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123 
2124   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2125   QualType Int32Ty =
2126       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2127   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2128   CGF.EmitStoreOfScalar(ThreadID,
2129                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130 
2131   return ThreadIDTemp;
2132 }
2133 
2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2135     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2136   SmallString<256> Buffer;
2137   llvm::raw_svector_ostream Out(Buffer);
2138   Out << Name;
2139   StringRef RuntimeName = Out.str();
2140   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2141   if (Elem.second) {
2142     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2143            "OMP internal variable has different type than requested");
2144     return &*Elem.second;
2145   }
2146 
2147   return Elem.second = new llvm::GlobalVariable(
2148              CGM.getModule(), Ty, /*IsConstant*/ false,
2149              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2150              Elem.first(), /*InsertBefore=*/nullptr,
2151              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2152 }
2153 
2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2155   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2156   std::string Name = getName({Prefix, "var"});
2157   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2158 }
2159 
2160 namespace {
2161 /// Common pre(post)-action for different OpenMP constructs.
2162 class CommonActionTy final : public PrePostActionTy {
2163   llvm::FunctionCallee EnterCallee;
2164   ArrayRef<llvm::Value *> EnterArgs;
2165   llvm::FunctionCallee ExitCallee;
2166   ArrayRef<llvm::Value *> ExitArgs;
2167   bool Conditional;
2168   llvm::BasicBlock *ContBlock = nullptr;
2169 
2170 public:
2171   CommonActionTy(llvm::FunctionCallee EnterCallee,
2172                  ArrayRef<llvm::Value *> EnterArgs,
2173                  llvm::FunctionCallee ExitCallee,
2174                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2175       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2176         ExitArgs(ExitArgs), Conditional(Conditional) {}
2177   void Enter(CodeGenFunction &CGF) override {
2178     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2179     if (Conditional) {
2180       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2181       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2182       ContBlock = CGF.createBasicBlock("omp_if.end");
2183       // Generate the branch (If-stmt)
2184       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2185       CGF.EmitBlock(ThenBlock);
2186     }
2187   }
2188   void Done(CodeGenFunction &CGF) {
2189     // Emit the rest of blocks/branches
2190     CGF.EmitBranch(ContBlock);
2191     CGF.EmitBlock(ContBlock, true);
2192   }
2193   void Exit(CodeGenFunction &CGF) override {
2194     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2195   }
2196 };
2197 } // anonymous namespace
2198 
2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2200                                          StringRef CriticalName,
2201                                          const RegionCodeGenTy &CriticalOpGen,
2202                                          SourceLocation Loc, const Expr *Hint) {
2203   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204   // CriticalOpGen();
2205   // __kmpc_end_critical(ident_t *, gtid, Lock);
2206   // Prepare arguments and build a call to __kmpc_critical
2207   if (!CGF.HaveInsertPoint())
2208     return;
2209   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2210                          getCriticalRegionLock(CriticalName)};
2211   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2212                                                 std::end(Args));
2213   if (Hint) {
2214     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2215         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216   }
2217   CommonActionTy Action(
2218       OMPBuilder.getOrCreateRuntimeFunction(
2219           CGM.getModule(),
2220           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2221       EnterArgs,
2222       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2223                                             OMPRTL___kmpc_end_critical),
2224       Args);
2225   CriticalOpGen.setAction(Action);
2226   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2227 }
2228 
2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2230                                        const RegionCodeGenTy &MasterOpGen,
2231                                        SourceLocation Loc) {
2232   if (!CGF.HaveInsertPoint())
2233     return;
2234   // if(__kmpc_master(ident_t *, gtid)) {
2235   //   MasterOpGen();
2236   //   __kmpc_end_master(ident_t *, gtid);
2237   // }
2238   // Prepare arguments and build a call to __kmpc_master
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2240   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241                             CGM.getModule(), OMPRTL___kmpc_master),
2242                         Args,
2243                         OMPBuilder.getOrCreateRuntimeFunction(
2244                             CGM.getModule(), OMPRTL___kmpc_end_master),
2245                         Args,
2246                         /*Conditional=*/true);
2247   MasterOpGen.setAction(Action);
2248   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2249   Action.Done(CGF);
2250 }
2251 
2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2253                                        const RegionCodeGenTy &MaskedOpGen,
2254                                        SourceLocation Loc, const Expr *Filter) {
2255   if (!CGF.HaveInsertPoint())
2256     return;
2257   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258   //   MaskedOpGen();
2259   //   __kmpc_end_masked(iden_t *, gtid);
2260   // }
2261   // Prepare arguments and build a call to __kmpc_masked
2262   llvm::Value *FilterVal = Filter
2263                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2264                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2265   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2266                          FilterVal};
2267   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2268                             getThreadID(CGF, Loc)};
2269   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_masked),
2271                         Args,
2272                         OMPBuilder.getOrCreateRuntimeFunction(
2273                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2274                         ArgsEnd,
2275                         /*Conditional=*/true);
2276   MaskedOpGen.setAction(Action);
2277   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2278   Action.Done(CGF);
2279 }
2280 
2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2282                                         SourceLocation Loc) {
2283   if (!CGF.HaveInsertPoint())
2284     return;
2285   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2286     OMPBuilder.createTaskyield(CGF.Builder);
2287   } else {
2288     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2289     llvm::Value *Args[] = {
2290         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2291         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2292     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2293                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2294                         Args);
2295   }
2296 
2297   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2298     Region->emitUntiedSwitch(CGF);
2299 }
2300 
2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2302                                           const RegionCodeGenTy &TaskgroupOpGen,
2303                                           SourceLocation Loc) {
2304   if (!CGF.HaveInsertPoint())
2305     return;
2306   // __kmpc_taskgroup(ident_t *, gtid);
2307   // TaskgroupOpGen();
2308   // __kmpc_end_taskgroup(ident_t *, gtid);
2309   // Prepare arguments and build a call to __kmpc_taskgroup
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2313                         Args,
2314                         OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2316                         Args);
2317   TaskgroupOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2319 }
2320 
2321 /// Given an array of pointers to variables, project the address of a
2322 /// given variable.
2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2324                                       unsigned Index, const VarDecl *Var) {
2325   // Pull out the pointer to the variable.
2326   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2327   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328 
2329   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2330   return Address(
2331       CGF.Builder.CreateBitCast(
2332           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2333       ElemTy, CGF.getContext().getDeclAlign(Var));
2334 }
2335 
2336 static llvm::Value *emitCopyprivateCopyFunction(
2337     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2338     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340     SourceLocation Loc) {
2341   ASTContext &C = CGM.getContext();
2342   // void copy_func(void *LHSArg, void *RHSArg);
2343   FunctionArgList Args;
2344   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2345                            ImplicitParamDecl::Other);
2346   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2347                            ImplicitParamDecl::Other);
2348   Args.push_back(&LHSArg);
2349   Args.push_back(&RHSArg);
2350   const auto &CGFI =
2351       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352   std::string Name =
2353       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2355                                     llvm::GlobalValue::InternalLinkage, Name,
2356                                     &CGM.getModule());
2357   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358   Fn->setDoesNotRecurse();
2359   CodeGenFunction CGF(CGM);
2360   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361   // Dest = (void*[n])(LHSArg);
2362   // Src = (void*[n])(RHSArg);
2363   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2364                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365                   ArgsElemType->getPointerTo()),
2366               ArgsElemType, CGF.getPointerAlign());
2367   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2368                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2369                   ArgsElemType->getPointerTo()),
2370               ArgsElemType, CGF.getPointerAlign());
2371   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2372   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373   // ...
2374   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2375   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2376     const auto *DestVar =
2377         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2378     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379 
2380     const auto *SrcVar =
2381         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2382     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383 
2384     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2385     QualType Type = VD->getType();
2386     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387   }
2388   CGF.FinishFunction();
2389   return Fn;
2390 }
2391 
2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2393                                        const RegionCodeGenTy &SingleOpGen,
2394                                        SourceLocation Loc,
2395                                        ArrayRef<const Expr *> CopyprivateVars,
2396                                        ArrayRef<const Expr *> SrcExprs,
2397                                        ArrayRef<const Expr *> DstExprs,
2398                                        ArrayRef<const Expr *> AssignmentOps) {
2399   if (!CGF.HaveInsertPoint())
2400     return;
2401   assert(CopyprivateVars.size() == SrcExprs.size() &&
2402          CopyprivateVars.size() == DstExprs.size() &&
2403          CopyprivateVars.size() == AssignmentOps.size());
2404   ASTContext &C = CGM.getContext();
2405   // int32 did_it = 0;
2406   // if(__kmpc_single(ident_t *, gtid)) {
2407   //   SingleOpGen();
2408   //   __kmpc_end_single(ident_t *, gtid);
2409   //   did_it = 1;
2410   // }
2411   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412   // <copy_func>, did_it);
2413 
2414   Address DidIt = Address::invalid();
2415   if (!CopyprivateVars.empty()) {
2416     // int32 did_it = 0;
2417     QualType KmpInt32Ty =
2418         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2419     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2420     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421   }
2422   // Prepare arguments and build a call to __kmpc_single
2423   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2424   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2425                             CGM.getModule(), OMPRTL___kmpc_single),
2426                         Args,
2427                         OMPBuilder.getOrCreateRuntimeFunction(
2428                             CGM.getModule(), OMPRTL___kmpc_end_single),
2429                         Args,
2430                         /*Conditional=*/true);
2431   SingleOpGen.setAction(Action);
2432   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2433   if (DidIt.isValid()) {
2434     // did_it = 1;
2435     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436   }
2437   Action.Done(CGF);
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440   if (DidIt.isValid()) {
2441     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2442     QualType CopyprivateArrayTy = C.getConstantArrayType(
2443         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2444         /*IndexTypeQuals=*/0);
2445     // Create a list of all private variables for copyprivate.
2446     Address CopyprivateList =
2447         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2448     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2449       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2450       CGF.Builder.CreateStore(
2451           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2452               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2453               CGF.VoidPtrTy),
2454           Elem);
2455     }
2456     // Build function that copies private values from single region to all other
2457     // threads in the corresponding parallel region.
2458     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2459         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2460         SrcExprs, DstExprs, AssignmentOps, Loc);
2461     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2462     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2463         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2464     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2465     llvm::Value *Args[] = {
2466         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2467         getThreadID(CGF, Loc),        // i32 <gtid>
2468         BufSize,                      // size_t <buf_size>
2469         CL.getPointer(),              // void *<copyprivate list>
2470         CpyFn,                        // void (*) (void *, void *) <copy_func>
2471         DidItVal                      // i32 did_it
2472     };
2473     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2474                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2475                         Args);
2476   }
2477 }
2478 
2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2480                                         const RegionCodeGenTy &OrderedOpGen,
2481                                         SourceLocation Loc, bool IsThreads) {
2482   if (!CGF.HaveInsertPoint())
2483     return;
2484   // __kmpc_ordered(ident_t *, gtid);
2485   // OrderedOpGen();
2486   // __kmpc_end_ordered(ident_t *, gtid);
2487   // Prepare arguments and build a call to __kmpc_ordered
2488   if (IsThreads) {
2489     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491                               CGM.getModule(), OMPRTL___kmpc_ordered),
2492                           Args,
2493                           OMPBuilder.getOrCreateRuntimeFunction(
2494                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2495                           Args);
2496     OrderedOpGen.setAction(Action);
2497     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2498     return;
2499   }
2500   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2501 }
2502 
2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2504   unsigned Flags;
2505   if (Kind == OMPD_for)
2506     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2507   else if (Kind == OMPD_sections)
2508     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2509   else if (Kind == OMPD_single)
2510     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2511   else if (Kind == OMPD_barrier)
2512     Flags = OMP_IDENT_BARRIER_EXPL;
2513   else
2514     Flags = OMP_IDENT_BARRIER_IMPL;
2515   return Flags;
2516 }
2517 
2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2519     CodeGenFunction &CGF, const OMPLoopDirective &S,
2520     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2521   // Check if the loop directive is actually a doacross loop directive. In this
2522   // case choose static, 1 schedule.
2523   if (llvm::any_of(
2524           S.getClausesOfKind<OMPOrderedClause>(),
2525           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2526     ScheduleKind = OMPC_SCHEDULE_static;
2527     // Chunk size is 1 in this case.
2528     llvm::APInt ChunkSize(32, 1);
2529     ChunkExpr = IntegerLiteral::Create(
2530         CGF.getContext(), ChunkSize,
2531         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2532         SourceLocation());
2533   }
2534 }
2535 
2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2537                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2538                                       bool ForceSimpleCall) {
2539   // Check if we should use the OMPBuilder
2540   auto *OMPRegionInfo =
2541       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2542   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2543     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2544         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2545     return;
2546   }
2547 
2548   if (!CGF.HaveInsertPoint())
2549     return;
2550   // Build call __kmpc_cancel_barrier(loc, thread_id);
2551   // Build call __kmpc_barrier(loc, thread_id);
2552   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2553   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554   // thread_id);
2555   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2556                          getThreadID(CGF, Loc)};
2557   if (OMPRegionInfo) {
2558     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2559       llvm::Value *Result = CGF.EmitRuntimeCall(
2560           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2561                                                 OMPRTL___kmpc_cancel_barrier),
2562           Args);
2563       if (EmitChecks) {
2564         // if (__kmpc_cancel_barrier()) {
2565         //   exit from construct;
2566         // }
2567         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2568         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2569         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2570         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2571         CGF.EmitBlock(ExitBB);
2572         //   exit from construct;
2573         CodeGenFunction::JumpDest CancelDestination =
2574             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2575         CGF.EmitBranchThroughCleanup(CancelDestination);
2576         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577       }
2578       return;
2579     }
2580   }
2581   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2582                           CGM.getModule(), OMPRTL___kmpc_barrier),
2583                       Args);
2584 }
2585 
2586 /// Map the OpenMP loop schedule to the runtime enumeration.
2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2588                                           bool Chunked, bool Ordered) {
2589   switch (ScheduleKind) {
2590   case OMPC_SCHEDULE_static:
2591     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2592                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2593   case OMPC_SCHEDULE_dynamic:
2594     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2595   case OMPC_SCHEDULE_guided:
2596     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2597   case OMPC_SCHEDULE_runtime:
2598     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2599   case OMPC_SCHEDULE_auto:
2600     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2601   case OMPC_SCHEDULE_unknown:
2602     assert(!Chunked && "chunk was specified but schedule kind not known");
2603     return Ordered ? OMP_ord_static : OMP_sch_static;
2604   }
2605   llvm_unreachable("Unexpected runtime schedule");
2606 }
2607 
2608 /// Map the OpenMP distribute schedule to the runtime enumeration.
2609 static OpenMPSchedType
2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2611   // only static is allowed for dist_schedule
2612   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2613 }
2614 
2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2616                                          bool Chunked) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619   return Schedule == OMP_sch_static;
2620 }
2621 
2622 bool CGOpenMPRuntime::isStaticNonchunked(
2623     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625   return Schedule == OMP_dist_sch_static;
2626 }
2627 
2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2629                                       bool Chunked) const {
2630   OpenMPSchedType Schedule =
2631       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2632   return Schedule == OMP_sch_static_chunked;
2633 }
2634 
2635 bool CGOpenMPRuntime::isStaticChunked(
2636     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2637   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2638   return Schedule == OMP_dist_sch_static_chunked;
2639 }
2640 
2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2642   OpenMPSchedType Schedule =
2643       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2644   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2645   return Schedule != OMP_sch_static;
2646 }
2647 
2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2649                                   OpenMPScheduleClauseModifier M1,
2650                                   OpenMPScheduleClauseModifier M2) {
2651   int Modifier = 0;
2652   switch (M1) {
2653   case OMPC_SCHEDULE_MODIFIER_monotonic:
2654     Modifier = OMP_sch_modifier_monotonic;
2655     break;
2656   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2657     Modifier = OMP_sch_modifier_nonmonotonic;
2658     break;
2659   case OMPC_SCHEDULE_MODIFIER_simd:
2660     if (Schedule == OMP_sch_static_chunked)
2661       Schedule = OMP_sch_static_balanced_chunked;
2662     break;
2663   case OMPC_SCHEDULE_MODIFIER_last:
2664   case OMPC_SCHEDULE_MODIFIER_unknown:
2665     break;
2666   }
2667   switch (M2) {
2668   case OMPC_SCHEDULE_MODIFIER_monotonic:
2669     Modifier = OMP_sch_modifier_monotonic;
2670     break;
2671   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2672     Modifier = OMP_sch_modifier_nonmonotonic;
2673     break;
2674   case OMPC_SCHEDULE_MODIFIER_simd:
2675     if (Schedule == OMP_sch_static_chunked)
2676       Schedule = OMP_sch_static_balanced_chunked;
2677     break;
2678   case OMPC_SCHEDULE_MODIFIER_last:
2679   case OMPC_SCHEDULE_MODIFIER_unknown:
2680     break;
2681   }
2682   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2683   // If the static schedule kind is specified or if the ordered clause is
2684   // specified, and if the nonmonotonic modifier is not specified, the effect is
2685   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2686   // modifier is specified, the effect is as if the nonmonotonic modifier is
2687   // specified.
2688   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2689     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2690           Schedule == OMP_sch_static_balanced_chunked ||
2691           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2692           Schedule == OMP_dist_sch_static_chunked ||
2693           Schedule == OMP_dist_sch_static))
2694       Modifier = OMP_sch_modifier_nonmonotonic;
2695   }
2696   return Schedule | Modifier;
2697 }
2698 
2699 void CGOpenMPRuntime::emitForDispatchInit(
2700     CodeGenFunction &CGF, SourceLocation Loc,
2701     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2702     bool Ordered, const DispatchRTInput &DispatchValues) {
2703   if (!CGF.HaveInsertPoint())
2704     return;
2705   OpenMPSchedType Schedule = getRuntimeSchedule(
2706       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2707   assert(Ordered ||
2708          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2709           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2710           Schedule != OMP_sch_static_balanced_chunked));
2711   // Call __kmpc_dispatch_init(
2712   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2713   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2714   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 
2716   // If the Chunk was not specified in the clause - use default value 1.
2717   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2718                                             : CGF.Builder.getIntN(IVSize, 1);
2719   llvm::Value *Args[] = {
2720       emitUpdateLocation(CGF, Loc),
2721       getThreadID(CGF, Loc),
2722       CGF.Builder.getInt32(addMonoNonMonoModifier(
2723           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2724       DispatchValues.LB,                                     // Lower
2725       DispatchValues.UB,                                     // Upper
2726       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2727       Chunk                                                  // Chunk
2728   };
2729   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2730 }
2731 
2732 static void emitForStaticInitCall(
2733     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2734     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2735     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2736     const CGOpenMPRuntime::StaticRTInput &Values) {
2737   if (!CGF.HaveInsertPoint())
2738     return;
2739 
2740   assert(!Values.Ordered);
2741   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2742          Schedule == OMP_sch_static_balanced_chunked ||
2743          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2744          Schedule == OMP_dist_sch_static ||
2745          Schedule == OMP_dist_sch_static_chunked);
2746 
2747   // Call __kmpc_for_static_init(
2748   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2749   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2750   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2751   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2752   llvm::Value *Chunk = Values.Chunk;
2753   if (Chunk == nullptr) {
2754     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2755             Schedule == OMP_dist_sch_static) &&
2756            "expected static non-chunked schedule");
2757     // If the Chunk was not specified in the clause - use default value 1.
2758     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2759   } else {
2760     assert((Schedule == OMP_sch_static_chunked ||
2761             Schedule == OMP_sch_static_balanced_chunked ||
2762             Schedule == OMP_ord_static_chunked ||
2763             Schedule == OMP_dist_sch_static_chunked) &&
2764            "expected static chunked schedule");
2765   }
2766   llvm::Value *Args[] = {
2767       UpdateLocation,
2768       ThreadId,
2769       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2770                                                   M2)), // Schedule type
2771       Values.IL.getPointer(),                           // &isLastIter
2772       Values.LB.getPointer(),                           // &LB
2773       Values.UB.getPointer(),                           // &UB
2774       Values.ST.getPointer(),                           // &Stride
2775       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2776       Chunk                                             // Chunk
2777   };
2778   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2779 }
2780 
2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2782                                         SourceLocation Loc,
2783                                         OpenMPDirectiveKind DKind,
2784                                         const OpenMPScheduleTy &ScheduleKind,
2785                                         const StaticRTInput &Values) {
2786   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2787       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2788   assert(isOpenMPWorksharingDirective(DKind) &&
2789          "Expected loop-based or sections-based directive.");
2790   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2791                                              isOpenMPLoopDirective(DKind)
2792                                                  ? OMP_IDENT_WORK_LOOP
2793                                                  : OMP_IDENT_WORK_SECTIONS);
2794   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2795   llvm::FunctionCallee StaticInitFunction =
2796       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2797   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2798   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2799                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2800 }
2801 
2802 void CGOpenMPRuntime::emitDistributeStaticInit(
2803     CodeGenFunction &CGF, SourceLocation Loc,
2804     OpenMPDistScheduleClauseKind SchedKind,
2805     const CGOpenMPRuntime::StaticRTInput &Values) {
2806   OpenMPSchedType ScheduleNum =
2807       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2808   llvm::Value *UpdatedLocation =
2809       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2810   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2811   llvm::FunctionCallee StaticInitFunction;
2812   bool isGPUDistribute =
2813       CGM.getLangOpts().OpenMPIsDevice &&
2814       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2815   StaticInitFunction = createForStaticInitFunction(
2816       Values.IVSize, Values.IVSigned, isGPUDistribute);
2817 
2818   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2819                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2820                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2824                                           SourceLocation Loc,
2825                                           OpenMPDirectiveKind DKind) {
2826   if (!CGF.HaveInsertPoint())
2827     return;
2828   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2829   llvm::Value *Args[] = {
2830       emitUpdateLocation(CGF, Loc,
2831                          isOpenMPDistributeDirective(DKind)
2832                              ? OMP_IDENT_WORK_DISTRIBUTE
2833                              : isOpenMPLoopDirective(DKind)
2834                                    ? OMP_IDENT_WORK_LOOP
2835                                    : OMP_IDENT_WORK_SECTIONS),
2836       getThreadID(CGF, Loc)};
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2839       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2840     CGF.EmitRuntimeCall(
2841         OMPBuilder.getOrCreateRuntimeFunction(
2842             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2843         Args);
2844   else
2845     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2846                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2847                         Args);
2848 }
2849 
2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2851                                                  SourceLocation Loc,
2852                                                  unsigned IVSize,
2853                                                  bool IVSigned) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2858   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2859 }
2860 
2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2862                                           SourceLocation Loc, unsigned IVSize,
2863                                           bool IVSigned, Address IL,
2864                                           Address LB, Address UB,
2865                                           Address ST) {
2866   // Call __kmpc_dispatch_next(
2867   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2868   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2869   //          kmp_int[32|64] *p_stride);
2870   llvm::Value *Args[] = {
2871       emitUpdateLocation(CGF, Loc),
2872       getThreadID(CGF, Loc),
2873       IL.getPointer(), // &isLastIter
2874       LB.getPointer(), // &Lower
2875       UB.getPointer(), // &Upper
2876       ST.getPointer()  // &Stride
2877   };
2878   llvm::Value *Call =
2879       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2880   return CGF.EmitScalarConversion(
2881       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2882       CGF.getContext().BoolTy, Loc);
2883 }
2884 
2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2886                                            llvm::Value *NumThreads,
2887                                            SourceLocation Loc) {
2888   if (!CGF.HaveInsertPoint())
2889     return;
2890   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2893       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2894   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2896                       Args);
2897 }
2898 
2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2900                                          ProcBindKind ProcBind,
2901                                          SourceLocation Loc) {
2902   if (!CGF.HaveInsertPoint())
2903     return;
2904   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2905   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2906   llvm::Value *Args[] = {
2907       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2909   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2911                       Args);
2912 }
2913 
2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2915                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2916   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2917     OMPBuilder.createFlush(CGF.Builder);
2918   } else {
2919     if (!CGF.HaveInsertPoint())
2920       return;
2921     // Build call void __kmpc_flush(ident_t *loc)
2922     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                             CGM.getModule(), OMPRTL___kmpc_flush),
2924                         emitUpdateLocation(CGF, Loc));
2925   }
2926 }
2927 
2928 namespace {
2929 /// Indexes of fields for type kmp_task_t.
2930 enum KmpTaskTFields {
2931   /// List of shared variables.
2932   KmpTaskTShareds,
2933   /// Task routine.
2934   KmpTaskTRoutine,
2935   /// Partition id for the untied tasks.
2936   KmpTaskTPartId,
2937   /// Function with call of destructors for private variables.
2938   Data1,
2939   /// Task priority.
2940   Data2,
2941   /// (Taskloops only) Lower bound.
2942   KmpTaskTLowerBound,
2943   /// (Taskloops only) Upper bound.
2944   KmpTaskTUpperBound,
2945   /// (Taskloops only) Stride.
2946   KmpTaskTStride,
2947   /// (Taskloops only) Is last iteration flag.
2948   KmpTaskTLastIter,
2949   /// (Taskloops only) Reduction data.
2950   KmpTaskTReductions,
2951 };
2952 } // anonymous namespace
2953 
2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2955   return OffloadEntriesTargetRegion.empty() &&
2956          OffloadEntriesDeviceGlobalVar.empty();
2957 }
2958 
2959 /// Initialize target region entry.
2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2961     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962                                     StringRef ParentName, unsigned LineNum,
2963                                     unsigned Order) {
2964   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2965                                              "only required for the device "
2966                                              "code generation.");
2967   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2968       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2969                                    OMPTargetRegionEntryTargetRegion);
2970   ++OffloadingEntriesNum;
2971 }
2972 
2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2974     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2975                                   StringRef ParentName, unsigned LineNum,
2976                                   llvm::Constant *Addr, llvm::Constant *ID,
2977                                   OMPTargetRegionEntryKind Flags) {
2978   // If we are emitting code for a target, the entry is already initialized,
2979   // only has to be registered.
2980   if (CGM.getLangOpts().OpenMPIsDevice) {
2981     // This could happen if the device compilation is invoked standalone.
2982     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2983       return;
2984     auto &Entry =
2985         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2986     Entry.setAddress(Addr);
2987     Entry.setID(ID);
2988     Entry.setFlags(Flags);
2989   } else {
2990     if (Flags ==
2991             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2992         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2993                                  /*IgnoreAddressId*/ true))
2994       return;
2995     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2996            "Target region entry already registered!");
2997     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2998     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2999     ++OffloadingEntriesNum;
3000   }
3001 }
3002 
3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3004     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3005     bool IgnoreAddressId) const {
3006   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3007   if (PerDevice == OffloadEntriesTargetRegion.end())
3008     return false;
3009   auto PerFile = PerDevice->second.find(FileID);
3010   if (PerFile == PerDevice->second.end())
3011     return false;
3012   auto PerParentName = PerFile->second.find(ParentName);
3013   if (PerParentName == PerFile->second.end())
3014     return false;
3015   auto PerLine = PerParentName->second.find(LineNum);
3016   if (PerLine == PerParentName->second.end())
3017     return false;
3018   // Fail if this entry is already registered.
3019   if (!IgnoreAddressId &&
3020       (PerLine->second.getAddress() || PerLine->second.getID()))
3021     return false;
3022   return true;
3023 }
3024 
3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3026     const OffloadTargetRegionEntryInfoActTy &Action) {
3027   // Scan all target region entries and perform the provided action.
3028   for (const auto &D : OffloadEntriesTargetRegion)
3029     for (const auto &F : D.second)
3030       for (const auto &P : F.second)
3031         for (const auto &L : P.second)
3032           Action(D.first, F.first, P.first(), L.first, L.second);
3033 }
3034 
3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3036     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3037                                        OMPTargetGlobalVarEntryKind Flags,
3038                                        unsigned Order) {
3039   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3040                                              "only required for the device "
3041                                              "code generation.");
3042   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3043   ++OffloadingEntriesNum;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3048                                      CharUnits VarSize,
3049                                      OMPTargetGlobalVarEntryKind Flags,
3050                                      llvm::GlobalValue::LinkageTypes Linkage) {
3051   if (CGM.getLangOpts().OpenMPIsDevice) {
3052     // This could happen if the device compilation is invoked standalone.
3053     if (!hasDeviceGlobalVarEntryInfo(VarName))
3054       return;
3055     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3056     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3057       if (Entry.getVarSize().isZero()) {
3058         Entry.setVarSize(VarSize);
3059         Entry.setLinkage(Linkage);
3060       }
3061       return;
3062     }
3063     Entry.setVarSize(VarSize);
3064     Entry.setLinkage(Linkage);
3065     Entry.setAddress(Addr);
3066   } else {
3067     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3068       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3069       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3070              "Entry not initialized!");
3071       if (Entry.getVarSize().isZero()) {
3072         Entry.setVarSize(VarSize);
3073         Entry.setLinkage(Linkage);
3074       }
3075       return;
3076     }
3077     OffloadEntriesDeviceGlobalVar.try_emplace(
3078         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3079     ++OffloadingEntriesNum;
3080   }
3081 }
3082 
3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3084     actOnDeviceGlobalVarEntriesInfo(
3085         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3086   // Scan all target region entries and perform the provided action.
3087   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3088     Action(E.getKey(), E.getValue());
3089 }
3090 
3091 void CGOpenMPRuntime::createOffloadEntry(
3092     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3093     llvm::GlobalValue::LinkageTypes Linkage) {
3094   OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
3095 }
3096 
3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3098   // Emit the offloading entries and metadata so that the device codegen side
3099   // can easily figure out what to emit. The produced metadata looks like
3100   // this:
3101   //
3102   // !omp_offload.info = !{!1, ...}
3103   //
3104   // Right now we only generate metadata for function that contain target
3105   // regions.
3106 
3107   // If we are in simd mode or there are no entries, we don't need to do
3108   // anything.
3109   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3110     return;
3111 
3112   llvm::Module &M = CGM.getModule();
3113   llvm::LLVMContext &C = M.getContext();
3114   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3115                          SourceLocation, StringRef>,
3116               16>
3117       OrderedEntries(OffloadEntriesInfoManager.size());
3118   llvm::SmallVector<StringRef, 16> ParentFunctions(
3119       OffloadEntriesInfoManager.size());
3120 
3121   // Auxiliary methods to create metadata values and strings.
3122   auto &&GetMDInt = [this](unsigned V) {
3123     return llvm::ConstantAsMetadata::get(
3124         llvm::ConstantInt::get(CGM.Int32Ty, V));
3125   };
3126 
3127   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3128 
3129   // Create the offloading info metadata node.
3130   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3131 
3132   // Create function that emits metadata for each target region entry;
3133   auto &&TargetRegionMetadataEmitter =
3134       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3135        &GetMDString](
3136           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3137           unsigned Line,
3138           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3139         // Generate metadata for target regions. Each entry of this metadata
3140         // contains:
3141         // - Entry 0 -> Kind of this type of metadata (0).
3142         // - Entry 1 -> Device ID of the file where the entry was identified.
3143         // - Entry 2 -> File ID of the file where the entry was identified.
3144         // - Entry 3 -> Mangled name of the function where the entry was
3145         // identified.
3146         // - Entry 4 -> Line in the file where the entry was identified.
3147         // - Entry 5 -> Order the entry was created.
3148         // The first element of the metadata node is the kind.
3149         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3150                                  GetMDInt(FileID),      GetMDString(ParentName),
3151                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3152 
3153         SourceLocation Loc;
3154         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3155                   E = CGM.getContext().getSourceManager().fileinfo_end();
3156              I != E; ++I) {
3157           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3158               I->getFirst()->getUniqueID().getFile() == FileID) {
3159             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3160                 I->getFirst(), Line, 1);
3161             break;
3162           }
3163         }
3164         // Save this entry in the right position of the ordered entries array.
3165         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3166         ParentFunctions[E.getOrder()] = ParentName;
3167 
3168         // Add metadata to the named metadata node.
3169         MD->addOperand(llvm::MDNode::get(C, Ops));
3170       };
3171 
3172   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3173       TargetRegionMetadataEmitter);
3174 
3175   // Create function that emits metadata for each device global variable entry;
3176   auto &&DeviceGlobalVarMetadataEmitter =
3177       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3178        MD](StringRef MangledName,
3179            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3180                &E) {
3181         // Generate metadata for global variables. Each entry of this metadata
3182         // contains:
3183         // - Entry 0 -> Kind of this type of metadata (1).
3184         // - Entry 1 -> Mangled name of the variable.
3185         // - Entry 2 -> Declare target kind.
3186         // - Entry 3 -> Order the entry was created.
3187         // The first element of the metadata node is the kind.
3188         llvm::Metadata *Ops[] = {
3189             GetMDInt(E.getKind()), GetMDString(MangledName),
3190             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3191 
3192         // Save this entry in the right position of the ordered entries array.
3193         OrderedEntries[E.getOrder()] =
3194             std::make_tuple(&E, SourceLocation(), MangledName);
3195 
3196         // Add metadata to the named metadata node.
3197         MD->addOperand(llvm::MDNode::get(C, Ops));
3198       };
3199 
3200   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3201       DeviceGlobalVarMetadataEmitter);
3202 
3203   for (const auto &E : OrderedEntries) {
3204     assert(std::get<0>(E) && "All ordered entries must exist!");
3205     if (const auto *CE =
3206             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3207                 std::get<0>(E))) {
3208       if (!CE->getID() || !CE->getAddress()) {
3209         // Do not blame the entry if the parent funtion is not emitted.
3210         StringRef FnName = ParentFunctions[CE->getOrder()];
3211         if (!CGM.GetGlobalValue(FnName))
3212           continue;
3213         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3214             DiagnosticsEngine::Error,
3215             "Offloading entry for target region in %0 is incorrect: either the "
3216             "address or the ID is invalid.");
3217         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3218         continue;
3219       }
3220       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3221                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3222     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3223                                              OffloadEntryInfoDeviceGlobalVar>(
3224                    std::get<0>(E))) {
3225       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3226           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3227               CE->getFlags());
3228       switch (Flags) {
3229       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3230         if (CGM.getLangOpts().OpenMPIsDevice &&
3231             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3232           continue;
3233         if (!CE->getAddress()) {
3234           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3235               DiagnosticsEngine::Error, "Offloading entry for declare target "
3236                                         "variable %0 is incorrect: the "
3237                                         "address is invalid.");
3238           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3239           continue;
3240         }
3241         // The vaiable has no definition - no need to add the entry.
3242         if (CE->getVarSize().isZero())
3243           continue;
3244         break;
3245       }
3246       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3247         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3248                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3249                "Declaret target link address is set.");
3250         if (CGM.getLangOpts().OpenMPIsDevice)
3251           continue;
3252         if (!CE->getAddress()) {
3253           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3254               DiagnosticsEngine::Error,
3255               "Offloading entry for declare target variable is incorrect: the "
3256               "address is invalid.");
3257           CGM.getDiags().Report(DiagID);
3258           continue;
3259         }
3260         break;
3261       }
3262 
3263       // Hidden or internal symbols on the device are not externally visible. We
3264       // should not attempt to register them by creating an offloading entry.
3265       if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3266         if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3267           continue;
3268 
3269       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270                          CE->getVarSize().getQuantity(), Flags,
3271                          CE->getLinkage());
3272     } else {
3273       llvm_unreachable("Unsupported entry kind.");
3274     }
3275   }
3276 }
3277 
3278 /// Loads all the offload entries information from the host IR
3279 /// metadata.
3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281   // If we are in target mode, load the metadata from the host IR. This code has
3282   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283 
3284   if (!CGM.getLangOpts().OpenMPIsDevice)
3285     return;
3286 
3287   if (CGM.getLangOpts().OMPHostIRFile.empty())
3288     return;
3289 
3290   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291   if (auto EC = Buf.getError()) {
3292     CGM.getDiags().Report(diag::err_cannot_open_file)
3293         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294     return;
3295   }
3296 
3297   llvm::LLVMContext C;
3298   auto ME = expectedToErrorOrAndEmitErrors(
3299       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300 
3301   if (auto EC = ME.getError()) {
3302     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304     CGM.getDiags().Report(DiagID)
3305         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306     return;
3307   }
3308 
3309   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310   if (!MD)
3311     return;
3312 
3313   for (llvm::MDNode *MN : MD->operands()) {
3314     auto &&GetMDInt = [MN](unsigned Idx) {
3315       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317     };
3318 
3319     auto &&GetMDString = [MN](unsigned Idx) {
3320       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321       return V->getString();
3322     };
3323 
3324     switch (GetMDInt(0)) {
3325     default:
3326       llvm_unreachable("Unexpected metadata!");
3327       break;
3328     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3329         OffloadingEntryInfoTargetRegion:
3330       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333           /*Order=*/GetMDInt(5));
3334       break;
3335     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3336         OffloadingEntryInfoDeviceGlobalVar:
3337       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338           /*MangledName=*/GetMDString(1),
3339           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340               /*Flags=*/GetMDInt(2)),
3341           /*Order=*/GetMDInt(3));
3342       break;
3343     }
3344   }
3345 }
3346 
3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348   if (!KmpRoutineEntryPtrTy) {
3349     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350     ASTContext &C = CGM.getContext();
3351     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3352     FunctionProtoType::ExtProtoInfo EPI;
3353     KmpRoutineEntryPtrQTy = C.getPointerType(
3354         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356   }
3357 }
3358 
3359 namespace {
3360 struct PrivateHelpersTy {
3361   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3362                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3363       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3364         PrivateElemInit(PrivateElemInit) {}
3365   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3366   const Expr *OriginalRef = nullptr;
3367   const VarDecl *Original = nullptr;
3368   const VarDecl *PrivateCopy = nullptr;
3369   const VarDecl *PrivateElemInit = nullptr;
3370   bool isLocalPrivate() const {
3371     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3372   }
3373 };
3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3375 } // anonymous namespace
3376 
3377 static bool isAllocatableDecl(const VarDecl *VD) {
3378   const VarDecl *CVD = VD->getCanonicalDecl();
3379   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3380     return false;
3381   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3382   // Use the default allocation.
3383   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3384            !AA->getAllocator());
3385 }
3386 
3387 static RecordDecl *
3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3389   if (!Privates.empty()) {
3390     ASTContext &C = CGM.getContext();
3391     // Build struct .kmp_privates_t. {
3392     //         /*  private vars  */
3393     //       };
3394     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3395     RD->startDefinition();
3396     for (const auto &Pair : Privates) {
3397       const VarDecl *VD = Pair.second.Original;
3398       QualType Type = VD->getType().getNonReferenceType();
3399       // If the private variable is a local variable with lvalue ref type,
3400       // allocate the pointer instead of the pointee type.
3401       if (Pair.second.isLocalPrivate()) {
3402         if (VD->getType()->isLValueReferenceType())
3403           Type = C.getPointerType(Type);
3404         if (isAllocatableDecl(VD))
3405           Type = C.getPointerType(Type);
3406       }
3407       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3408       if (VD->hasAttrs()) {
3409         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3410              E(VD->getAttrs().end());
3411              I != E; ++I)
3412           FD->addAttr(*I);
3413       }
3414     }
3415     RD->completeDefinition();
3416     return RD;
3417   }
3418   return nullptr;
3419 }
3420 
3421 static RecordDecl *
3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3423                          QualType KmpInt32Ty,
3424                          QualType KmpRoutineEntryPointerQTy) {
3425   ASTContext &C = CGM.getContext();
3426   // Build struct kmp_task_t {
3427   //         void *              shareds;
3428   //         kmp_routine_entry_t routine;
3429   //         kmp_int32           part_id;
3430   //         kmp_cmplrdata_t data1;
3431   //         kmp_cmplrdata_t data2;
3432   // For taskloops additional fields:
3433   //         kmp_uint64          lb;
3434   //         kmp_uint64          ub;
3435   //         kmp_int64           st;
3436   //         kmp_int32           liter;
3437   //         void *              reductions;
3438   //       };
3439   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3440   UD->startDefinition();
3441   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3442   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3443   UD->completeDefinition();
3444   QualType KmpCmplrdataTy = C.getRecordType(UD);
3445   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3446   RD->startDefinition();
3447   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3449   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3450   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3451   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3452   if (isOpenMPTaskLoopDirective(Kind)) {
3453     QualType KmpUInt64Ty =
3454         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3455     QualType KmpInt64Ty =
3456         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3457     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3458     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3459     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3460     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3461     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3462   }
3463   RD->completeDefinition();
3464   return RD;
3465 }
3466 
3467 static RecordDecl *
3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3469                                      ArrayRef<PrivateDataTy> Privates) {
3470   ASTContext &C = CGM.getContext();
3471   // Build struct kmp_task_t_with_privates {
3472   //         kmp_task_t task_data;
3473   //         .kmp_privates_t. privates;
3474   //       };
3475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3476   RD->startDefinition();
3477   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3478   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3479     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3480   RD->completeDefinition();
3481   return RD;
3482 }
3483 
3484 /// Emit a proxy function which accepts kmp_task_t as the second
3485 /// argument.
3486 /// \code
3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3488 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3489 ///   For taskloops:
3490 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491 ///   tt->reductions, tt->shareds);
3492 ///   return 0;
3493 /// }
3494 /// \endcode
3495 static llvm::Function *
3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3497                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3498                       QualType KmpTaskTWithPrivatesPtrQTy,
3499                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3500                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3501                       llvm::Value *TaskPrivatesMap) {
3502   ASTContext &C = CGM.getContext();
3503   FunctionArgList Args;
3504   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3505                             ImplicitParamDecl::Other);
3506   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3507                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3508                                 ImplicitParamDecl::Other);
3509   Args.push_back(&GtidArg);
3510   Args.push_back(&TaskTypeArg);
3511   const auto &TaskEntryFnInfo =
3512       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3513   llvm::FunctionType *TaskEntryTy =
3514       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3515   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3516   auto *TaskEntry = llvm::Function::Create(
3517       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3518   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3519   TaskEntry->setDoesNotRecurse();
3520   CodeGenFunction CGF(CGM);
3521   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3522                     Loc, Loc);
3523 
3524   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3525   // tt,
3526   // For taskloops:
3527   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3528   // tt->task_data.shareds);
3529   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3530       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3531   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3532       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3533       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3534   const auto *KmpTaskTWithPrivatesQTyRD =
3535       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3536   LValue Base =
3537       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3538   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3539   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3540   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3541   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3542 
3543   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3544   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3545   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3546       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3547       CGF.ConvertTypeForMem(SharedsPtrTy));
3548 
3549   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3550   llvm::Value *PrivatesParam;
3551   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3552     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3553     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3554         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3555   } else {
3556     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3557   }
3558 
3559   llvm::Value *CommonArgs[] = {
3560       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3561       CGF.Builder
3562           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3563                                                CGF.VoidPtrTy, CGF.Int8Ty)
3564           .getPointer()};
3565   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3566                                           std::end(CommonArgs));
3567   if (isOpenMPTaskLoopDirective(Kind)) {
3568     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3569     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3570     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3571     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3572     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3573     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3574     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3575     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3576     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3577     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3578     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3579     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3580     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3581     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3582     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3583     CallArgs.push_back(LBParam);
3584     CallArgs.push_back(UBParam);
3585     CallArgs.push_back(StParam);
3586     CallArgs.push_back(LIParam);
3587     CallArgs.push_back(RParam);
3588   }
3589   CallArgs.push_back(SharedsParam);
3590 
3591   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3592                                                   CallArgs);
3593   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3594                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3595   CGF.FinishFunction();
3596   return TaskEntry;
3597 }
3598 
3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3600                                             SourceLocation Loc,
3601                                             QualType KmpInt32Ty,
3602                                             QualType KmpTaskTWithPrivatesPtrQTy,
3603                                             QualType KmpTaskTWithPrivatesQTy) {
3604   ASTContext &C = CGM.getContext();
3605   FunctionArgList Args;
3606   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3607                             ImplicitParamDecl::Other);
3608   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3610                                 ImplicitParamDecl::Other);
3611   Args.push_back(&GtidArg);
3612   Args.push_back(&TaskTypeArg);
3613   const auto &DestructorFnInfo =
3614       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3615   llvm::FunctionType *DestructorFnTy =
3616       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3617   std::string Name =
3618       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3619   auto *DestructorFn =
3620       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3621                              Name, &CGM.getModule());
3622   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3623                                     DestructorFnInfo);
3624   DestructorFn->setDoesNotRecurse();
3625   CodeGenFunction CGF(CGM);
3626   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3627                     Args, Loc, Loc);
3628 
3629   LValue Base = CGF.EmitLoadOfPointerLValue(
3630       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3631       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3632   const auto *KmpTaskTWithPrivatesQTyRD =
3633       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3634   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3635   Base = CGF.EmitLValueForField(Base, *FI);
3636   for (const auto *Field :
3637        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3638     if (QualType::DestructionKind DtorKind =
3639             Field->getType().isDestructedType()) {
3640       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3641       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3642     }
3643   }
3644   CGF.FinishFunction();
3645   return DestructorFn;
3646 }
3647 
3648 /// Emit a privates mapping function for correct handling of private and
3649 /// firstprivate variables.
3650 /// \code
3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3652 /// **noalias priv1,...,  <tyn> **noalias privn) {
3653 ///   *priv1 = &.privates.priv1;
3654 ///   ...;
3655 ///   *privn = &.privates.privn;
3656 /// }
3657 /// \endcode
3658 static llvm::Value *
3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3660                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3661                                ArrayRef<PrivateDataTy> Privates) {
3662   ASTContext &C = CGM.getContext();
3663   FunctionArgList Args;
3664   ImplicitParamDecl TaskPrivatesArg(
3665       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3666       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3667       ImplicitParamDecl::Other);
3668   Args.push_back(&TaskPrivatesArg);
3669   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3670   unsigned Counter = 1;
3671   for (const Expr *E : Data.PrivateVars) {
3672     Args.push_back(ImplicitParamDecl::Create(
3673         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674         C.getPointerType(C.getPointerType(E->getType()))
3675             .withConst()
3676             .withRestrict(),
3677         ImplicitParamDecl::Other));
3678     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679     PrivateVarsPos[VD] = Counter;
3680     ++Counter;
3681   }
3682   for (const Expr *E : Data.FirstprivateVars) {
3683     Args.push_back(ImplicitParamDecl::Create(
3684         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685         C.getPointerType(C.getPointerType(E->getType()))
3686             .withConst()
3687             .withRestrict(),
3688         ImplicitParamDecl::Other));
3689     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690     PrivateVarsPos[VD] = Counter;
3691     ++Counter;
3692   }
3693   for (const Expr *E : Data.LastprivateVars) {
3694     Args.push_back(ImplicitParamDecl::Create(
3695         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696         C.getPointerType(C.getPointerType(E->getType()))
3697             .withConst()
3698             .withRestrict(),
3699         ImplicitParamDecl::Other));
3700     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3701     PrivateVarsPos[VD] = Counter;
3702     ++Counter;
3703   }
3704   for (const VarDecl *VD : Data.PrivateLocals) {
3705     QualType Ty = VD->getType().getNonReferenceType();
3706     if (VD->getType()->isLValueReferenceType())
3707       Ty = C.getPointerType(Ty);
3708     if (isAllocatableDecl(VD))
3709       Ty = C.getPointerType(Ty);
3710     Args.push_back(ImplicitParamDecl::Create(
3711         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3713         ImplicitParamDecl::Other));
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   const auto &TaskPrivatesMapFnInfo =
3718       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3719   llvm::FunctionType *TaskPrivatesMapTy =
3720       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3721   std::string Name =
3722       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3723   auto *TaskPrivatesMap = llvm::Function::Create(
3724       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3725       &CGM.getModule());
3726   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3727                                     TaskPrivatesMapFnInfo);
3728   if (CGM.getLangOpts().Optimize) {
3729     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3730     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3731     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3732   }
3733   CodeGenFunction CGF(CGM);
3734   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3735                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3736 
3737   // *privi = &.privates.privi;
3738   LValue Base = CGF.EmitLoadOfPointerLValue(
3739       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3740       TaskPrivatesArg.getType()->castAs<PointerType>());
3741   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3742   Counter = 0;
3743   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3744     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3745     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3746     LValue RefLVal =
3747         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3748     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3749         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3750     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3751     ++Counter;
3752   }
3753   CGF.FinishFunction();
3754   return TaskPrivatesMap;
3755 }
3756 
3757 /// Emit initialization for private variables in task-based directives.
3758 static void emitPrivatesInit(CodeGenFunction &CGF,
3759                              const OMPExecutableDirective &D,
3760                              Address KmpTaskSharedsPtr, LValue TDBase,
3761                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3762                              QualType SharedsTy, QualType SharedsPtrTy,
3763                              const OMPTaskDataTy &Data,
3764                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3765   ASTContext &C = CGF.getContext();
3766   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3767   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3768   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3769                                  ? OMPD_taskloop
3770                                  : OMPD_task;
3771   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3772   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3773   LValue SrcBase;
3774   bool IsTargetTask =
3775       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3776       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3777   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3778   // PointersArray, SizesArray, and MappersArray. The original variables for
3779   // these arrays are not captured and we get their addresses explicitly.
3780   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3781       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3782     SrcBase = CGF.MakeAddrLValue(
3783         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3784             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3785             CGF.ConvertTypeForMem(SharedsTy)),
3786         SharedsTy);
3787   }
3788   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3789   for (const PrivateDataTy &Pair : Privates) {
3790     // Do not initialize private locals.
3791     if (Pair.second.isLocalPrivate()) {
3792       ++FI;
3793       continue;
3794     }
3795     const VarDecl *VD = Pair.second.PrivateCopy;
3796     const Expr *Init = VD->getAnyInitializer();
3797     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3798                              !CGF.isTrivialInitializer(Init)))) {
3799       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3800       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3801         const VarDecl *OriginalVD = Pair.second.Original;
3802         // Check if the variable is the target-based BasePointersArray,
3803         // PointersArray, SizesArray, or MappersArray.
3804         LValue SharedRefLValue;
3805         QualType Type = PrivateLValue.getType();
3806         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3807         if (IsTargetTask && !SharedField) {
3808           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3809                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3810                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3811                          ->getNumParams() == 0 &&
3812                  isa<TranslationUnitDecl>(
3813                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3814                          ->getDeclContext()) &&
3815                  "Expected artificial target data variable.");
3816           SharedRefLValue =
3817               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3818         } else if (ForDup) {
3819           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3820           SharedRefLValue = CGF.MakeAddrLValue(
3821               SharedRefLValue.getAddress(CGF).withAlignment(
3822                   C.getDeclAlign(OriginalVD)),
3823               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3824               SharedRefLValue.getTBAAInfo());
3825         } else if (CGF.LambdaCaptureFields.count(
3826                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3827                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3828           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3829         } else {
3830           // Processing for implicitly captured variables.
3831           InlinedOpenMPRegionRAII Region(
3832               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3833               /*HasCancel=*/false, /*NoInheritance=*/true);
3834           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3835         }
3836         if (Type->isArrayType()) {
3837           // Initialize firstprivate array.
3838           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3839             // Perform simple memcpy.
3840             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3841           } else {
3842             // Initialize firstprivate array using element-by-element
3843             // initialization.
3844             CGF.EmitOMPAggregateAssign(
3845                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3846                 Type,
3847                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3848                                                   Address SrcElement) {
3849                   // Clean up any temporaries needed by the initialization.
3850                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3851                   InitScope.addPrivate(Elem, SrcElement);
3852                   (void)InitScope.Privatize();
3853                   // Emit initialization for single element.
3854                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3855                       CGF, &CapturesInfo);
3856                   CGF.EmitAnyExprToMem(Init, DestElement,
3857                                        Init->getType().getQualifiers(),
3858                                        /*IsInitializer=*/false);
3859                 });
3860           }
3861         } else {
3862           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3863           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3864           (void)InitScope.Privatize();
3865           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3866           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3867                              /*capturedByInit=*/false);
3868         }
3869       } else {
3870         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3871       }
3872     }
3873     ++FI;
3874   }
3875 }
3876 
3877 /// Check if duplication function is required for taskloops.
3878 static bool checkInitIsRequired(CodeGenFunction &CGF,
3879                                 ArrayRef<PrivateDataTy> Privates) {
3880   bool InitRequired = false;
3881   for (const PrivateDataTy &Pair : Privates) {
3882     if (Pair.second.isLocalPrivate())
3883       continue;
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3887                                     !CGF.isTrivialInitializer(Init));
3888     if (InitRequired)
3889       break;
3890   }
3891   return InitRequired;
3892 }
3893 
3894 
3895 /// Emit task_dup function (for initialization of
3896 /// private/firstprivate/lastprivate vars and last_iter flag)
3897 /// \code
3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3899 /// lastpriv) {
3900 /// // setup lastprivate flag
3901 ///    task_dst->last = lastpriv;
3902 /// // could be constructor calls here...
3903 /// }
3904 /// \endcode
3905 static llvm::Value *
3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3907                     const OMPExecutableDirective &D,
3908                     QualType KmpTaskTWithPrivatesPtrQTy,
3909                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3910                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3911                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3912                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3913   ASTContext &C = CGM.getContext();
3914   FunctionArgList Args;
3915   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3916                            KmpTaskTWithPrivatesPtrQTy,
3917                            ImplicitParamDecl::Other);
3918   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3919                            KmpTaskTWithPrivatesPtrQTy,
3920                            ImplicitParamDecl::Other);
3921   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3922                                 ImplicitParamDecl::Other);
3923   Args.push_back(&DstArg);
3924   Args.push_back(&SrcArg);
3925   Args.push_back(&LastprivArg);
3926   const auto &TaskDupFnInfo =
3927       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3928   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3929   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3930   auto *TaskDup = llvm::Function::Create(
3931       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3932   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3933   TaskDup->setDoesNotRecurse();
3934   CodeGenFunction CGF(CGM);
3935   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3936                     Loc);
3937 
3938   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3939       CGF.GetAddrOfLocalVar(&DstArg),
3940       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3941   // task_dst->liter = lastpriv;
3942   if (WithLastIter) {
3943     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3944     LValue Base = CGF.EmitLValueForField(
3945         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3946     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3947     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3948         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3949     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3950   }
3951 
3952   // Emit initial values for private copies (if any).
3953   assert(!Privates.empty());
3954   Address KmpTaskSharedsPtr = Address::invalid();
3955   if (!Data.FirstprivateVars.empty()) {
3956     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957         CGF.GetAddrOfLocalVar(&SrcArg),
3958         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959     LValue Base = CGF.EmitLValueForField(
3960         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3961     KmpTaskSharedsPtr = Address(
3962         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3963                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3964                                                   KmpTaskTShareds)),
3965                              Loc),
3966         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3967   }
3968   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3969                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3970   CGF.FinishFunction();
3971   return TaskDup;
3972 }
3973 
3974 /// Checks if destructor function is required to be generated.
3975 /// \return true if cleanups are required, false otherwise.
3976 static bool
3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3978                          ArrayRef<PrivateDataTy> Privates) {
3979   for (const PrivateDataTy &P : Privates) {
3980     if (P.second.isLocalPrivate())
3981       continue;
3982     QualType Ty = P.second.Original->getType().getNonReferenceType();
3983     if (Ty.isDestructedType())
3984       return true;
3985   }
3986   return false;
3987 }
3988 
3989 namespace {
3990 /// Loop generator for OpenMP iterator expression.
3991 class OMPIteratorGeneratorScope final
3992     : public CodeGenFunction::OMPPrivateScope {
3993   CodeGenFunction &CGF;
3994   const OMPIteratorExpr *E = nullptr;
3995   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3996   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3997   OMPIteratorGeneratorScope() = delete;
3998   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3999 
4000 public:
4001   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4002       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4003     if (!E)
4004       return;
4005     SmallVector<llvm::Value *, 4> Uppers;
4006     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4007       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4008       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4009       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4010       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4011       addPrivate(
4012           HelperData.CounterVD,
4013           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4014     }
4015     Privatize();
4016 
4017     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4018       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4019       LValue CLVal =
4020           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4021                              HelperData.CounterVD->getType());
4022       // Counter = 0;
4023       CGF.EmitStoreOfScalar(
4024           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4025           CLVal);
4026       CodeGenFunction::JumpDest &ContDest =
4027           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4028       CodeGenFunction::JumpDest &ExitDest =
4029           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4030       // N = <number-of_iterations>;
4031       llvm::Value *N = Uppers[I];
4032       // cont:
4033       // if (Counter < N) goto body; else goto exit;
4034       CGF.EmitBlock(ContDest.getBlock());
4035       auto *CVal =
4036           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4037       llvm::Value *Cmp =
4038           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4039               ? CGF.Builder.CreateICmpSLT(CVal, N)
4040               : CGF.Builder.CreateICmpULT(CVal, N);
4041       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4042       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4043       // body:
4044       CGF.EmitBlock(BodyBB);
4045       // Iteri = Begini + Counter * Stepi;
4046       CGF.EmitIgnoredExpr(HelperData.Update);
4047     }
4048   }
4049   ~OMPIteratorGeneratorScope() {
4050     if (!E)
4051       return;
4052     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4053       // Counter = Counter + 1;
4054       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4055       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4056       // goto cont;
4057       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4058       // exit:
4059       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4060     }
4061   }
4062 };
4063 } // namespace
4064 
4065 static std::pair<llvm::Value *, llvm::Value *>
4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4067   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4068   llvm::Value *Addr;
4069   if (OASE) {
4070     const Expr *Base = OASE->getBase();
4071     Addr = CGF.EmitScalarExpr(Base);
4072   } else {
4073     Addr = CGF.EmitLValue(E).getPointer(CGF);
4074   }
4075   llvm::Value *SizeVal;
4076   QualType Ty = E->getType();
4077   if (OASE) {
4078     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4079     for (const Expr *SE : OASE->getDimensions()) {
4080       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4081       Sz = CGF.EmitScalarConversion(
4082           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4083       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4084     }
4085   } else if (const auto *ASE =
4086                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4087     LValue UpAddrLVal =
4088         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4089     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4090     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4091         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4092     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4093     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4094     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4095   } else {
4096     SizeVal = CGF.getTypeSize(Ty);
4097   }
4098   return std::make_pair(Addr, SizeVal);
4099 }
4100 
4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4103   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4104   if (KmpTaskAffinityInfoTy.isNull()) {
4105     RecordDecl *KmpAffinityInfoRD =
4106         C.buildImplicitRecord("kmp_task_affinity_info_t");
4107     KmpAffinityInfoRD->startDefinition();
4108     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4109     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4110     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4111     KmpAffinityInfoRD->completeDefinition();
4112     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4113   }
4114 }
4115 
4116 CGOpenMPRuntime::TaskResultTy
4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4118                               const OMPExecutableDirective &D,
4119                               llvm::Function *TaskFunction, QualType SharedsTy,
4120                               Address Shareds, const OMPTaskDataTy &Data) {
4121   ASTContext &C = CGM.getContext();
4122   llvm::SmallVector<PrivateDataTy, 4> Privates;
4123   // Aggregate privates and sort them by the alignment.
4124   const auto *I = Data.PrivateCopies.begin();
4125   for (const Expr *E : Data.PrivateVars) {
4126     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4127     Privates.emplace_back(
4128         C.getDeclAlign(VD),
4129         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4130                          /*PrivateElemInit=*/nullptr));
4131     ++I;
4132   }
4133   I = Data.FirstprivateCopies.begin();
4134   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4135   for (const Expr *E : Data.FirstprivateVars) {
4136     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4137     Privates.emplace_back(
4138         C.getDeclAlign(VD),
4139         PrivateHelpersTy(
4140             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4141             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4142     ++I;
4143     ++IElemInitRef;
4144   }
4145   I = Data.LastprivateCopies.begin();
4146   for (const Expr *E : Data.LastprivateVars) {
4147     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4148     Privates.emplace_back(
4149         C.getDeclAlign(VD),
4150         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4151                          /*PrivateElemInit=*/nullptr));
4152     ++I;
4153   }
4154   for (const VarDecl *VD : Data.PrivateLocals) {
4155     if (isAllocatableDecl(VD))
4156       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4157     else
4158       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4159   }
4160   llvm::stable_sort(Privates,
4161                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4162                       return L.first > R.first;
4163                     });
4164   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4165   // Build type kmp_routine_entry_t (if not built yet).
4166   emitKmpRoutineEntryT(KmpInt32Ty);
4167   // Build type kmp_task_t (if not built yet).
4168   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4169     if (SavedKmpTaskloopTQTy.isNull()) {
4170       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4171           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4172     }
4173     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4174   } else {
4175     assert((D.getDirectiveKind() == OMPD_task ||
4176             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4177             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4178            "Expected taskloop, task or target directive");
4179     if (SavedKmpTaskTQTy.isNull()) {
4180       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4181           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4182     }
4183     KmpTaskTQTy = SavedKmpTaskTQTy;
4184   }
4185   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4186   // Build particular struct kmp_task_t for the given task.
4187   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4188       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4189   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4190   QualType KmpTaskTWithPrivatesPtrQTy =
4191       C.getPointerType(KmpTaskTWithPrivatesQTy);
4192   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4193   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4194       KmpTaskTWithPrivatesTy->getPointerTo();
4195   llvm::Value *KmpTaskTWithPrivatesTySize =
4196       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4197   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4198 
4199   // Emit initial values for private copies (if any).
4200   llvm::Value *TaskPrivatesMap = nullptr;
4201   llvm::Type *TaskPrivatesMapTy =
4202       std::next(TaskFunction->arg_begin(), 3)->getType();
4203   if (!Privates.empty()) {
4204     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4205     TaskPrivatesMap =
4206         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4207     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4208         TaskPrivatesMap, TaskPrivatesMapTy);
4209   } else {
4210     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4211         cast<llvm::PointerType>(TaskPrivatesMapTy));
4212   }
4213   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4214   // kmp_task_t *tt);
4215   llvm::Function *TaskEntry = emitProxyTaskFunction(
4216       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4217       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4218       TaskPrivatesMap);
4219 
4220   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4221   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4222   // kmp_routine_entry_t *task_entry);
4223   // Task flags. Format is taken from
4224   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4225   // description of kmp_tasking_flags struct.
4226   enum {
4227     TiedFlag = 0x1,
4228     FinalFlag = 0x2,
4229     DestructorsFlag = 0x8,
4230     PriorityFlag = 0x20,
4231     DetachableFlag = 0x40,
4232   };
4233   unsigned Flags = Data.Tied ? TiedFlag : 0;
4234   bool NeedsCleanup = false;
4235   if (!Privates.empty()) {
4236     NeedsCleanup =
4237         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4238     if (NeedsCleanup)
4239       Flags = Flags | DestructorsFlag;
4240   }
4241   if (Data.Priority.getInt())
4242     Flags = Flags | PriorityFlag;
4243   if (D.hasClausesOfKind<OMPDetachClause>())
4244     Flags = Flags | DetachableFlag;
4245   llvm::Value *TaskFlags =
4246       Data.Final.getPointer()
4247           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4248                                      CGF.Builder.getInt32(FinalFlag),
4249                                      CGF.Builder.getInt32(/*C=*/0))
4250           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4251   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4252   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4253   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4254       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4255       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4256           TaskEntry, KmpRoutineEntryPtrTy)};
4257   llvm::Value *NewTask;
4258   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4259     // Check if we have any device clause associated with the directive.
4260     const Expr *Device = nullptr;
4261     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4262       Device = C->getDevice();
4263     // Emit device ID if any otherwise use default value.
4264     llvm::Value *DeviceID;
4265     if (Device)
4266       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4267                                            CGF.Int64Ty, /*isSigned=*/true);
4268     else
4269       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4270     AllocArgs.push_back(DeviceID);
4271     NewTask = CGF.EmitRuntimeCall(
4272         OMPBuilder.getOrCreateRuntimeFunction(
4273             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4274         AllocArgs);
4275   } else {
4276     NewTask =
4277         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4278                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4279                             AllocArgs);
4280   }
4281   // Emit detach clause initialization.
4282   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4283   // task_descriptor);
4284   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4285     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4286     LValue EvtLVal = CGF.EmitLValue(Evt);
4287 
4288     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4289     // int gtid, kmp_task_t *task);
4290     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4291     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4292     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4293     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4294         OMPBuilder.getOrCreateRuntimeFunction(
4295             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4296         {Loc, Tid, NewTask});
4297     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4298                                       Evt->getExprLoc());
4299     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4300   }
4301   // Process affinity clauses.
4302   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4303     // Process list of affinity data.
4304     ASTContext &C = CGM.getContext();
4305     Address AffinitiesArray = Address::invalid();
4306     // Calculate number of elements to form the array of affinity data.
4307     llvm::Value *NumOfElements = nullptr;
4308     unsigned NumAffinities = 0;
4309     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4310       if (const Expr *Modifier = C->getModifier()) {
4311         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4312         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4313           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4314           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4315           NumOfElements =
4316               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4317         }
4318       } else {
4319         NumAffinities += C->varlist_size();
4320       }
4321     }
4322     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4323     // Fields ids in kmp_task_affinity_info record.
4324     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4325 
4326     QualType KmpTaskAffinityInfoArrayTy;
4327     if (NumOfElements) {
4328       NumOfElements = CGF.Builder.CreateNUWAdd(
4329           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4330       auto *OVE = new (C) OpaqueValueExpr(
4331           Loc,
4332           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4333           VK_PRValue);
4334       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4335                                                     RValue::get(NumOfElements));
4336       KmpTaskAffinityInfoArrayTy =
4337           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4338                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4339       // Properly emit variable-sized array.
4340       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4341                                            ImplicitParamDecl::Other);
4342       CGF.EmitVarDecl(*PD);
4343       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4344       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4345                                                 /*isSigned=*/false);
4346     } else {
4347       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4348           KmpTaskAffinityInfoTy,
4349           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4350           ArrayType::Normal, /*IndexTypeQuals=*/0);
4351       AffinitiesArray =
4352           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4353       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4354       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4355                                              /*isSigned=*/false);
4356     }
4357 
4358     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4359     // Fill array by elements without iterators.
4360     unsigned Pos = 0;
4361     bool HasIterator = false;
4362     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4363       if (C->getModifier()) {
4364         HasIterator = true;
4365         continue;
4366       }
4367       for (const Expr *E : C->varlists()) {
4368         llvm::Value *Addr;
4369         llvm::Value *Size;
4370         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4371         LValue Base =
4372             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4373                                KmpTaskAffinityInfoTy);
4374         // affs[i].base_addr = &<Affinities[i].second>;
4375         LValue BaseAddrLVal = CGF.EmitLValueForField(
4376             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4377         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4378                               BaseAddrLVal);
4379         // affs[i].len = sizeof(<Affinities[i].second>);
4380         LValue LenLVal = CGF.EmitLValueForField(
4381             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4382         CGF.EmitStoreOfScalar(Size, LenLVal);
4383         ++Pos;
4384       }
4385     }
4386     LValue PosLVal;
4387     if (HasIterator) {
4388       PosLVal = CGF.MakeAddrLValue(
4389           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4390           C.getSizeType());
4391       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4392     }
4393     // Process elements with iterators.
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       const Expr *Modifier = C->getModifier();
4396       if (!Modifier)
4397         continue;
4398       OMPIteratorGeneratorScope IteratorScope(
4399           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4405         LValue Base = CGF.MakeAddrLValue(
4406             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         Idx = CGF.Builder.CreateNUWAdd(
4417             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4418         CGF.EmitStoreOfScalar(Idx, PosLVal);
4419       }
4420     }
4421     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4422     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4423     // naffins, kmp_task_affinity_info_t *affin_list);
4424     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4425     llvm::Value *GTid = getThreadID(CGF, Loc);
4426     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4428     // FIXME: Emit the function and ignore its result for now unless the
4429     // runtime function is properly implemented.
4430     (void)CGF.EmitRuntimeCall(
4431         OMPBuilder.getOrCreateRuntimeFunction(
4432             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4433         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4434   }
4435   llvm::Value *NewTaskNewTaskTTy =
4436       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4437           NewTask, KmpTaskTWithPrivatesPtrTy);
4438   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4439                                                KmpTaskTWithPrivatesQTy);
4440   LValue TDBase =
4441       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4442   // Fill the data in the resulting kmp_task_t record.
4443   // Copy shareds if there are any.
4444   Address KmpTaskSharedsPtr = Address::invalid();
4445   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4446     KmpTaskSharedsPtr = Address(
4447         CGF.EmitLoadOfScalar(
4448             CGF.EmitLValueForField(
4449                 TDBase,
4450                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4451             Loc),
4452         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4453     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4454     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4455     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4456   }
4457   // Emit initial values for private copies (if any).
4458   TaskResultTy Result;
4459   if (!Privates.empty()) {
4460     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4461                      SharedsTy, SharedsPtrTy, Data, Privates,
4462                      /*ForDup=*/false);
4463     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4464         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4465       Result.TaskDupFn = emitTaskDupFunction(
4466           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4467           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4468           /*WithLastIter=*/!Data.LastprivateVars.empty());
4469     }
4470   }
4471   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4472   enum { Priority = 0, Destructors = 1 };
4473   // Provide pointer to function with destructors for privates.
4474   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4475   const RecordDecl *KmpCmplrdataUD =
4476       (*FI)->getType()->getAsUnionType()->getDecl();
4477   if (NeedsCleanup) {
4478     llvm::Value *DestructorFn = emitDestructorsFunction(
4479         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4480         KmpTaskTWithPrivatesQTy);
4481     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4482     LValue DestructorsLV = CGF.EmitLValueForField(
4483         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4484     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4485                               DestructorFn, KmpRoutineEntryPtrTy),
4486                           DestructorsLV);
4487   }
4488   // Set priority.
4489   if (Data.Priority.getInt()) {
4490     LValue Data2LV = CGF.EmitLValueForField(
4491         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4492     LValue PriorityLV = CGF.EmitLValueForField(
4493         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4494     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4495   }
4496   Result.NewTask = NewTask;
4497   Result.TaskEntry = TaskEntry;
4498   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4499   Result.TDBase = TDBase;
4500   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4501   return Result;
4502 }
4503 
4504 namespace {
4505 /// Dependence kind for RTL.
4506 enum RTLDependenceKindTy {
4507   DepIn = 0x01,
4508   DepInOut = 0x3,
4509   DepMutexInOutSet = 0x4,
4510   DepInOutSet = 0x8,
4511   DepOmpAllMem = 0x80,
4512 };
4513 /// Fields ids in kmp_depend_info record.
4514 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4515 } // namespace
4516 
4517 /// Translates internal dependency kind into the runtime kind.
4518 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4519   RTLDependenceKindTy DepKind;
4520   switch (K) {
4521   case OMPC_DEPEND_in:
4522     DepKind = DepIn;
4523     break;
4524   // Out and InOut dependencies must use the same code.
4525   case OMPC_DEPEND_out:
4526   case OMPC_DEPEND_inout:
4527     DepKind = DepInOut;
4528     break;
4529   case OMPC_DEPEND_mutexinoutset:
4530     DepKind = DepMutexInOutSet;
4531     break;
4532   case OMPC_DEPEND_inoutset:
4533     DepKind = DepInOutSet;
4534     break;
4535   case OMPC_DEPEND_outallmemory:
4536     DepKind = DepOmpAllMem;
4537     break;
4538   case OMPC_DEPEND_source:
4539   case OMPC_DEPEND_sink:
4540   case OMPC_DEPEND_depobj:
4541   case OMPC_DEPEND_inoutallmemory:
4542   case OMPC_DEPEND_unknown:
4543     llvm_unreachable("Unknown task dependence type");
4544   }
4545   return DepKind;
4546 }
4547 
4548 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4549 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4550                            QualType &FlagsTy) {
4551   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4552   if (KmpDependInfoTy.isNull()) {
4553     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4554     KmpDependInfoRD->startDefinition();
4555     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4556     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4557     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4558     KmpDependInfoRD->completeDefinition();
4559     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4560   }
4561 }
4562 
4563 std::pair<llvm::Value *, LValue>
4564 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4565                                    SourceLocation Loc) {
4566   ASTContext &C = CGM.getContext();
4567   QualType FlagsTy;
4568   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4569   RecordDecl *KmpDependInfoRD =
4570       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4571   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4572   LValue Base = CGF.EmitLoadOfPointerLValue(
4573       CGF.Builder.CreateElementBitCast(
4574           DepobjLVal.getAddress(CGF),
4575           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4576       KmpDependInfoPtrTy->castAs<PointerType>());
4577   Address DepObjAddr = CGF.Builder.CreateGEP(
4578       Base.getAddress(CGF),
4579       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4580   LValue NumDepsBase = CGF.MakeAddrLValue(
4581       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4582   // NumDeps = deps[i].base_addr;
4583   LValue BaseAddrLVal = CGF.EmitLValueForField(
4584       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4585   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4586   return std::make_pair(NumDeps, Base);
4587 }
4588 
4589 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4590                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4591                            const OMPTaskDataTy::DependData &Data,
4592                            Address DependenciesArray) {
4593   CodeGenModule &CGM = CGF.CGM;
4594   ASTContext &C = CGM.getContext();
4595   QualType FlagsTy;
4596   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597   RecordDecl *KmpDependInfoRD =
4598       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4600 
4601   OMPIteratorGeneratorScope IteratorScope(
4602       CGF, cast_or_null<OMPIteratorExpr>(
4603                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4604                                  : nullptr));
4605   for (const Expr *E : Data.DepExprs) {
4606     llvm::Value *Addr;
4607     llvm::Value *Size;
4608 
4609     // The expression will be a nullptr in the 'omp_all_memory' case.
4610     if (E) {
4611       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4612       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4613     } else {
4614       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4615       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4616     }
4617     LValue Base;
4618     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4619       Base = CGF.MakeAddrLValue(
4620           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4621     } else {
4622       assert(E && "Expected a non-null expression");
4623       LValue &PosLVal = *Pos.get<LValue *>();
4624       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4625       Base = CGF.MakeAddrLValue(
4626           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4627     }
4628     // deps[i].base_addr = &<Dependencies[i].second>;
4629     LValue BaseAddrLVal = CGF.EmitLValueForField(
4630         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4631     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4632     // deps[i].len = sizeof(<Dependencies[i].second>);
4633     LValue LenLVal = CGF.EmitLValueForField(
4634         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4635     CGF.EmitStoreOfScalar(Size, LenLVal);
4636     // deps[i].flags = <Dependencies[i].first>;
4637     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4638     LValue FlagsLVal = CGF.EmitLValueForField(
4639         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4640     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4641                           FlagsLVal);
4642     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4643       ++(*P);
4644     } else {
4645       LValue &PosLVal = *Pos.get<LValue *>();
4646       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4647       Idx = CGF.Builder.CreateNUWAdd(Idx,
4648                                      llvm::ConstantInt::get(Idx->getType(), 1));
4649       CGF.EmitStoreOfScalar(Idx, PosLVal);
4650     }
4651   }
4652 }
4653 
4654 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4655     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4656     const OMPTaskDataTy::DependData &Data) {
4657   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4658          "Expected depobj dependecy kind.");
4659   SmallVector<llvm::Value *, 4> Sizes;
4660   SmallVector<LValue, 4> SizeLVals;
4661   ASTContext &C = CGF.getContext();
4662   {
4663     OMPIteratorGeneratorScope IteratorScope(
4664         CGF, cast_or_null<OMPIteratorExpr>(
4665                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4666                                    : nullptr));
4667     for (const Expr *E : Data.DepExprs) {
4668       llvm::Value *NumDeps;
4669       LValue Base;
4670       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4671       std::tie(NumDeps, Base) =
4672           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4673       LValue NumLVal = CGF.MakeAddrLValue(
4674           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4675           C.getUIntPtrType());
4676       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4677                               NumLVal.getAddress(CGF));
4678       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4679       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4680       CGF.EmitStoreOfScalar(Add, NumLVal);
4681       SizeLVals.push_back(NumLVal);
4682     }
4683   }
4684   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4685     llvm::Value *Size =
4686         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4687     Sizes.push_back(Size);
4688   }
4689   return Sizes;
4690 }
4691 
4692 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4693                                          QualType &KmpDependInfoTy,
4694                                          LValue PosLVal,
4695                                          const OMPTaskDataTy::DependData &Data,
4696                                          Address DependenciesArray) {
4697   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4698          "Expected depobj dependecy kind.");
4699   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4700   {
4701     OMPIteratorGeneratorScope IteratorScope(
4702         CGF, cast_or_null<OMPIteratorExpr>(
4703                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4704                                    : nullptr));
4705     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4706       const Expr *E = Data.DepExprs[I];
4707       llvm::Value *NumDeps;
4708       LValue Base;
4709       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4710       std::tie(NumDeps, Base) =
4711           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4712 
4713       // memcopy dependency data.
4714       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4715           ElSize,
4716           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4717       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4718       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4719       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4720 
4721       // Increase pos.
4722       // pos += size;
4723       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4724       CGF.EmitStoreOfScalar(Add, PosLVal);
4725     }
4726   }
4727 }
4728 
4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4730     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4731     SourceLocation Loc) {
4732   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4733         return D.DepExprs.empty();
4734       }))
4735     return std::make_pair(nullptr, Address::invalid());
4736   // Process list of dependencies.
4737   ASTContext &C = CGM.getContext();
4738   Address DependenciesArray = Address::invalid();
4739   llvm::Value *NumOfElements = nullptr;
4740   unsigned NumDependencies = std::accumulate(
4741       Dependencies.begin(), Dependencies.end(), 0,
4742       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4743         return D.DepKind == OMPC_DEPEND_depobj
4744                    ? V
4745                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4746       });
4747   QualType FlagsTy;
4748   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749   bool HasDepobjDeps = false;
4750   bool HasRegularWithIterators = false;
4751   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4752   llvm::Value *NumOfRegularWithIterators =
4753       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4754   // Calculate number of depobj dependecies and regular deps with the iterators.
4755   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4756     if (D.DepKind == OMPC_DEPEND_depobj) {
4757       SmallVector<llvm::Value *, 4> Sizes =
4758           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4759       for (llvm::Value *Size : Sizes) {
4760         NumOfDepobjElements =
4761             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4762       }
4763       HasDepobjDeps = true;
4764       continue;
4765     }
4766     // Include number of iterations, if any.
4767 
4768     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4769       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4770         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4771         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4772         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4773             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4774         NumOfRegularWithIterators =
4775             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4776       }
4777       HasRegularWithIterators = true;
4778       continue;
4779     }
4780   }
4781 
4782   QualType KmpDependInfoArrayTy;
4783   if (HasDepobjDeps || HasRegularWithIterators) {
4784     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4785                                            /*isSigned=*/false);
4786     if (HasDepobjDeps) {
4787       NumOfElements =
4788           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4789     }
4790     if (HasRegularWithIterators) {
4791       NumOfElements =
4792           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4793     }
4794     auto *OVE = new (C) OpaqueValueExpr(
4795         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4796         VK_PRValue);
4797     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4798                                                   RValue::get(NumOfElements));
4799     KmpDependInfoArrayTy =
4800         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4801                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4802     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4803     // Properly emit variable-sized array.
4804     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4805                                          ImplicitParamDecl::Other);
4806     CGF.EmitVarDecl(*PD);
4807     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4808     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4809                                               /*isSigned=*/false);
4810   } else {
4811     KmpDependInfoArrayTy = C.getConstantArrayType(
4812         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4813         ArrayType::Normal, /*IndexTypeQuals=*/0);
4814     DependenciesArray =
4815         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4816     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4817     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4818                                            /*isSigned=*/false);
4819   }
4820   unsigned Pos = 0;
4821   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4822     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4823         Dependencies[I].IteratorExpr)
4824       continue;
4825     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4826                    DependenciesArray);
4827   }
4828   // Copy regular dependecies with iterators.
4829   LValue PosLVal = CGF.MakeAddrLValue(
4830       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4831   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4832   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4833     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4834         !Dependencies[I].IteratorExpr)
4835       continue;
4836     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4837                    DependenciesArray);
4838   }
4839   // Copy final depobj arrays without iterators.
4840   if (HasDepobjDeps) {
4841     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4842       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4843         continue;
4844       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4845                          DependenciesArray);
4846     }
4847   }
4848   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4849       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4850   return std::make_pair(NumOfElements, DependenciesArray);
4851 }
4852 
4853 Address CGOpenMPRuntime::emitDepobjDependClause(
4854     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4855     SourceLocation Loc) {
4856   if (Dependencies.DepExprs.empty())
4857     return Address::invalid();
4858   // Process list of dependencies.
4859   ASTContext &C = CGM.getContext();
4860   Address DependenciesArray = Address::invalid();
4861   unsigned NumDependencies = Dependencies.DepExprs.size();
4862   QualType FlagsTy;
4863   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4864   RecordDecl *KmpDependInfoRD =
4865       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4866 
4867   llvm::Value *Size;
4868   // Define type kmp_depend_info[<Dependencies.size()>];
4869   // For depobj reserve one extra element to store the number of elements.
4870   // It is required to handle depobj(x) update(in) construct.
4871   // kmp_depend_info[<Dependencies.size()>] deps;
4872   llvm::Value *NumDepsVal;
4873   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4874   if (const auto *IE =
4875           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4876     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4877     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4878       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4879       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4880       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4881     }
4882     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4883                                     NumDepsVal);
4884     CharUnits SizeInBytes =
4885         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4886     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4887     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4888     NumDepsVal =
4889         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4890   } else {
4891     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4892         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4893         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4894     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4895     Size = CGM.getSize(Sz.alignTo(Align));
4896     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4897   }
4898   // Need to allocate on the dynamic memory.
4899   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4900   // Use default allocator.
4901   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4902   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4903 
4904   llvm::Value *Addr =
4905       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4906                               CGM.getModule(), OMPRTL___kmpc_alloc),
4907                           Args, ".dep.arr.addr");
4908   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4909   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4910       Addr, KmpDependInfoLlvmTy->getPointerTo());
4911   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4912   // Write number of elements in the first element of array for depobj.
4913   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4914   // deps[i].base_addr = NumDependencies;
4915   LValue BaseAddrLVal = CGF.EmitLValueForField(
4916       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4917   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4918   llvm::PointerUnion<unsigned *, LValue *> Pos;
4919   unsigned Idx = 1;
4920   LValue PosLVal;
4921   if (Dependencies.IteratorExpr) {
4922     PosLVal = CGF.MakeAddrLValue(
4923         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4924         C.getSizeType());
4925     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4926                           /*IsInit=*/true);
4927     Pos = &PosLVal;
4928   } else {
4929     Pos = &Idx;
4930   }
4931   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4932   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4933       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4934       CGF.Int8Ty);
4935   return DependenciesArray;
4936 }
4937 
4938 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4939                                         SourceLocation Loc) {
4940   ASTContext &C = CGM.getContext();
4941   QualType FlagsTy;
4942   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4943   LValue Base = CGF.EmitLoadOfPointerLValue(
4944       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4945   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4946   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4947       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4948       CGF.ConvertTypeForMem(KmpDependInfoTy));
4949   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4950       Addr.getElementType(), Addr.getPointer(),
4951       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4952   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4953                                                                CGF.VoidPtrTy);
4954   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4955   // Use default allocator.
4956   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4957   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4958 
4959   // _kmpc_free(gtid, addr, nullptr);
4960   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4961                                 CGM.getModule(), OMPRTL___kmpc_free),
4962                             Args);
4963 }
4964 
4965 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4966                                        OpenMPDependClauseKind NewDepKind,
4967                                        SourceLocation Loc) {
4968   ASTContext &C = CGM.getContext();
4969   QualType FlagsTy;
4970   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4971   RecordDecl *KmpDependInfoRD =
4972       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4973   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4974   llvm::Value *NumDeps;
4975   LValue Base;
4976   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4977 
4978   Address Begin = Base.getAddress(CGF);
4979   // Cast from pointer to array type to pointer to single element.
4980   llvm::Value *End = CGF.Builder.CreateGEP(
4981       Begin.getElementType(), Begin.getPointer(), NumDeps);
4982   // The basic structure here is a while-do loop.
4983   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4984   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4985   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4986   CGF.EmitBlock(BodyBB);
4987   llvm::PHINode *ElementPHI =
4988       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4989   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4990   Begin = Begin.withPointer(ElementPHI);
4991   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4992                             Base.getTBAAInfo());
4993   // deps[i].flags = NewDepKind;
4994   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4995   LValue FlagsLVal = CGF.EmitLValueForField(
4996       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4997   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4998                         FlagsLVal);
4999 
5000   // Shift the address forward by one element.
5001   Address ElementNext =
5002       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5003   ElementPHI->addIncoming(ElementNext.getPointer(),
5004                           CGF.Builder.GetInsertBlock());
5005   llvm::Value *IsEmpty =
5006       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5007   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5008   // Done.
5009   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5010 }
5011 
5012 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5013                                    const OMPExecutableDirective &D,
5014                                    llvm::Function *TaskFunction,
5015                                    QualType SharedsTy, Address Shareds,
5016                                    const Expr *IfCond,
5017                                    const OMPTaskDataTy &Data) {
5018   if (!CGF.HaveInsertPoint())
5019     return;
5020 
5021   TaskResultTy Result =
5022       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5023   llvm::Value *NewTask = Result.NewTask;
5024   llvm::Function *TaskEntry = Result.TaskEntry;
5025   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5026   LValue TDBase = Result.TDBase;
5027   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5028   // Process list of dependences.
5029   Address DependenciesArray = Address::invalid();
5030   llvm::Value *NumOfElements;
5031   std::tie(NumOfElements, DependenciesArray) =
5032       emitDependClause(CGF, Data.Dependences, Loc);
5033 
5034   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5035   // libcall.
5036   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5037   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5038   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5039   // list is not empty
5040   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5041   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5042   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5043   llvm::Value *DepTaskArgs[7];
5044   if (!Data.Dependences.empty()) {
5045     DepTaskArgs[0] = UpLoc;
5046     DepTaskArgs[1] = ThreadID;
5047     DepTaskArgs[2] = NewTask;
5048     DepTaskArgs[3] = NumOfElements;
5049     DepTaskArgs[4] = DependenciesArray.getPointer();
5050     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5051     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5052   }
5053   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5054                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5055     if (!Data.Tied) {
5056       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5057       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5058       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5059     }
5060     if (!Data.Dependences.empty()) {
5061       CGF.EmitRuntimeCall(
5062           OMPBuilder.getOrCreateRuntimeFunction(
5063               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5064           DepTaskArgs);
5065     } else {
5066       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5068                           TaskArgs);
5069     }
5070     // Check if parent region is untied and build return for untied task;
5071     if (auto *Region =
5072             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5073       Region->emitUntiedSwitch(CGF);
5074   };
5075 
5076   llvm::Value *DepWaitTaskArgs[6];
5077   if (!Data.Dependences.empty()) {
5078     DepWaitTaskArgs[0] = UpLoc;
5079     DepWaitTaskArgs[1] = ThreadID;
5080     DepWaitTaskArgs[2] = NumOfElements;
5081     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5082     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5083     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5084   }
5085   auto &M = CGM.getModule();
5086   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5087                         TaskEntry, &Data, &DepWaitTaskArgs,
5088                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5089     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5090     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5091     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5092     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5093     // is specified.
5094     if (!Data.Dependences.empty())
5095       CGF.EmitRuntimeCall(
5096           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5097           DepWaitTaskArgs);
5098     // Call proxy_task_entry(gtid, new_task);
5099     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5100                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5101       Action.Enter(CGF);
5102       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5103       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5104                                                           OutlinedFnArgs);
5105     };
5106 
5107     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5108     // kmp_task_t *new_task);
5109     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5110     // kmp_task_t *new_task);
5111     RegionCodeGenTy RCG(CodeGen);
5112     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5113                               M, OMPRTL___kmpc_omp_task_begin_if0),
5114                           TaskArgs,
5115                           OMPBuilder.getOrCreateRuntimeFunction(
5116                               M, OMPRTL___kmpc_omp_task_complete_if0),
5117                           TaskArgs);
5118     RCG.setAction(Action);
5119     RCG(CGF);
5120   };
5121 
5122   if (IfCond) {
5123     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5124   } else {
5125     RegionCodeGenTy ThenRCG(ThenCodeGen);
5126     ThenRCG(CGF);
5127   }
5128 }
5129 
5130 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5131                                        const OMPLoopDirective &D,
5132                                        llvm::Function *TaskFunction,
5133                                        QualType SharedsTy, Address Shareds,
5134                                        const Expr *IfCond,
5135                                        const OMPTaskDataTy &Data) {
5136   if (!CGF.HaveInsertPoint())
5137     return;
5138   TaskResultTy Result =
5139       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5143   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5144   // sched, kmp_uint64 grainsize, void *task_dup);
5145   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5146   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5147   llvm::Value *IfVal;
5148   if (IfCond) {
5149     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5150                                       /*isSigned=*/true);
5151   } else {
5152     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5153   }
5154 
5155   LValue LBLVal = CGF.EmitLValueForField(
5156       Result.TDBase,
5157       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5158   const auto *LBVar =
5159       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5160   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5161                        LBLVal.getQuals(),
5162                        /*IsInitializer=*/true);
5163   LValue UBLVal = CGF.EmitLValueForField(
5164       Result.TDBase,
5165       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5166   const auto *UBVar =
5167       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5168   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5169                        UBLVal.getQuals(),
5170                        /*IsInitializer=*/true);
5171   LValue StLVal = CGF.EmitLValueForField(
5172       Result.TDBase,
5173       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5174   const auto *StVar =
5175       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5176   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5177                        StLVal.getQuals(),
5178                        /*IsInitializer=*/true);
5179   // Store reductions address.
5180   LValue RedLVal = CGF.EmitLValueForField(
5181       Result.TDBase,
5182       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5183   if (Data.Reductions) {
5184     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5185   } else {
5186     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5187                                CGF.getContext().VoidPtrTy);
5188   }
5189   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5190   llvm::Value *TaskArgs[] = {
5191       UpLoc,
5192       ThreadID,
5193       Result.NewTask,
5194       IfVal,
5195       LBLVal.getPointer(CGF),
5196       UBLVal.getPointer(CGF),
5197       CGF.EmitLoadOfScalar(StLVal, Loc),
5198       llvm::ConstantInt::getSigned(
5199           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5200       llvm::ConstantInt::getSigned(
5201           CGF.IntTy, Data.Schedule.getPointer()
5202                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5203                          : NoSchedule),
5204       Data.Schedule.getPointer()
5205           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5206                                       /*isSigned=*/false)
5207           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5208       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5209                              Result.TaskDupFn, CGF.VoidPtrTy)
5210                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5211   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5212                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5213                       TaskArgs);
5214 }
5215 
5216 /// Emit reduction operation for each element of array (required for
5217 /// array sections) LHS op = RHS.
5218 /// \param Type Type of array.
5219 /// \param LHSVar Variable on the left side of the reduction operation
5220 /// (references element of array in original variable).
5221 /// \param RHSVar Variable on the right side of the reduction operation
5222 /// (references element of array in original variable).
5223 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5224 /// RHSVar.
5225 static void EmitOMPAggregateReduction(
5226     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5227     const VarDecl *RHSVar,
5228     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5229                                   const Expr *, const Expr *)> &RedOpGen,
5230     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5231     const Expr *UpExpr = nullptr) {
5232   // Perform element-by-element initialization.
5233   QualType ElementTy;
5234   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5235   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5236 
5237   // Drill down to the base element type on both arrays.
5238   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5239   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5240 
5241   llvm::Value *RHSBegin = RHSAddr.getPointer();
5242   llvm::Value *LHSBegin = LHSAddr.getPointer();
5243   // Cast from pointer to array type to pointer to single element.
5244   llvm::Value *LHSEnd =
5245       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5246   // The basic structure here is a while-do loop.
5247   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5248   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5249   llvm::Value *IsEmpty =
5250       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5251   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5252 
5253   // Enter the loop body, making that address the current address.
5254   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5255   CGF.EmitBlock(BodyBB);
5256 
5257   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5258 
5259   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5260       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5261   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5262   Address RHSElementCurrent(
5263       RHSElementPHI, RHSAddr.getElementType(),
5264       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5265 
5266   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5267       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5268   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5269   Address LHSElementCurrent(
5270       LHSElementPHI, LHSAddr.getElementType(),
5271       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5272 
5273   // Emit copy.
5274   CodeGenFunction::OMPPrivateScope Scope(CGF);
5275   Scope.addPrivate(LHSVar, LHSElementCurrent);
5276   Scope.addPrivate(RHSVar, RHSElementCurrent);
5277   Scope.Privatize();
5278   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5279   Scope.ForceCleanup();
5280 
5281   // Shift the address forward by one element.
5282   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5283       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5284       "omp.arraycpy.dest.element");
5285   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5286       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5287       "omp.arraycpy.src.element");
5288   // Check whether we've reached the end.
5289   llvm::Value *Done =
5290       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5291   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5292   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5293   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5294 
5295   // Done.
5296   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5297 }
5298 
5299 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5300 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5301 /// UDR combiner function.
5302 static void emitReductionCombiner(CodeGenFunction &CGF,
5303                                   const Expr *ReductionOp) {
5304   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5305     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5306       if (const auto *DRE =
5307               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5308         if (const auto *DRD =
5309                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5310           std::pair<llvm::Function *, llvm::Function *> Reduction =
5311               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5312           RValue Func = RValue::get(Reduction.first);
5313           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5314           CGF.EmitIgnoredExpr(ReductionOp);
5315           return;
5316         }
5317   CGF.EmitIgnoredExpr(ReductionOp);
5318 }
5319 
5320 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5321     SourceLocation Loc, llvm::Type *ArgsElemType,
5322     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5323     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5324   ASTContext &C = CGM.getContext();
5325 
5326   // void reduction_func(void *LHSArg, void *RHSArg);
5327   FunctionArgList Args;
5328   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5329                            ImplicitParamDecl::Other);
5330   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5331                            ImplicitParamDecl::Other);
5332   Args.push_back(&LHSArg);
5333   Args.push_back(&RHSArg);
5334   const auto &CGFI =
5335       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5336   std::string Name = getName({"omp", "reduction", "reduction_func"});
5337   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5338                                     llvm::GlobalValue::InternalLinkage, Name,
5339                                     &CGM.getModule());
5340   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5341   Fn->setDoesNotRecurse();
5342   CodeGenFunction CGF(CGM);
5343   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5344 
5345   // Dst = (void*[n])(LHSArg);
5346   // Src = (void*[n])(RHSArg);
5347   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5348                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5349                   ArgsElemType->getPointerTo()),
5350               ArgsElemType, CGF.getPointerAlign());
5351   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5352                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5353                   ArgsElemType->getPointerTo()),
5354               ArgsElemType, CGF.getPointerAlign());
5355 
5356   //  ...
5357   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5358   //  ...
5359   CodeGenFunction::OMPPrivateScope Scope(CGF);
5360   const auto *IPriv = Privates.begin();
5361   unsigned Idx = 0;
5362   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5363     const auto *RHSVar =
5364         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5365     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5366     const auto *LHSVar =
5367         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5368     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5369     QualType PrivTy = (*IPriv)->getType();
5370     if (PrivTy->isVariablyModifiedType()) {
5371       // Get array size and emit VLA type.
5372       ++Idx;
5373       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5374       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5375       const VariableArrayType *VLA =
5376           CGF.getContext().getAsVariableArrayType(PrivTy);
5377       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5378       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5379           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5380       CGF.EmitVariablyModifiedType(PrivTy);
5381     }
5382   }
5383   Scope.Privatize();
5384   IPriv = Privates.begin();
5385   const auto *ILHS = LHSExprs.begin();
5386   const auto *IRHS = RHSExprs.begin();
5387   for (const Expr *E : ReductionOps) {
5388     if ((*IPriv)->getType()->isArrayType()) {
5389       // Emit reduction for array section.
5390       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5391       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5392       EmitOMPAggregateReduction(
5393           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5394           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5395             emitReductionCombiner(CGF, E);
5396           });
5397     } else {
5398       // Emit reduction for array subscript or single variable.
5399       emitReductionCombiner(CGF, E);
5400     }
5401     ++IPriv;
5402     ++ILHS;
5403     ++IRHS;
5404   }
5405   Scope.ForceCleanup();
5406   CGF.FinishFunction();
5407   return Fn;
5408 }
5409 
5410 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5411                                                   const Expr *ReductionOp,
5412                                                   const Expr *PrivateRef,
5413                                                   const DeclRefExpr *LHS,
5414                                                   const DeclRefExpr *RHS) {
5415   if (PrivateRef->getType()->isArrayType()) {
5416     // Emit reduction for array section.
5417     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5418     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5419     EmitOMPAggregateReduction(
5420         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5421         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5422           emitReductionCombiner(CGF, ReductionOp);
5423         });
5424   } else {
5425     // Emit reduction for array subscript or single variable.
5426     emitReductionCombiner(CGF, ReductionOp);
5427   }
5428 }
5429 
5430 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5431                                     ArrayRef<const Expr *> Privates,
5432                                     ArrayRef<const Expr *> LHSExprs,
5433                                     ArrayRef<const Expr *> RHSExprs,
5434                                     ArrayRef<const Expr *> ReductionOps,
5435                                     ReductionOptionsTy Options) {
5436   if (!CGF.HaveInsertPoint())
5437     return;
5438 
5439   bool WithNowait = Options.WithNowait;
5440   bool SimpleReduction = Options.SimpleReduction;
5441 
5442   // Next code should be emitted for reduction:
5443   //
5444   // static kmp_critical_name lock = { 0 };
5445   //
5446   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5447   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5448   //  ...
5449   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5450   //  *(Type<n>-1*)rhs[<n>-1]);
5451   // }
5452   //
5453   // ...
5454   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5455   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5456   // RedList, reduce_func, &<lock>)) {
5457   // case 1:
5458   //  ...
5459   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5460   //  ...
5461   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5462   // break;
5463   // case 2:
5464   //  ...
5465   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5466   //  ...
5467   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5468   // break;
5469   // default:;
5470   // }
5471   //
5472   // if SimpleReduction is true, only the next code is generated:
5473   //  ...
5474   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5475   //  ...
5476 
5477   ASTContext &C = CGM.getContext();
5478 
5479   if (SimpleReduction) {
5480     CodeGenFunction::RunCleanupsScope Scope(CGF);
5481     const auto *IPriv = Privates.begin();
5482     const auto *ILHS = LHSExprs.begin();
5483     const auto *IRHS = RHSExprs.begin();
5484     for (const Expr *E : ReductionOps) {
5485       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5486                                   cast<DeclRefExpr>(*IRHS));
5487       ++IPriv;
5488       ++ILHS;
5489       ++IRHS;
5490     }
5491     return;
5492   }
5493 
5494   // 1. Build a list of reduction variables.
5495   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5496   auto Size = RHSExprs.size();
5497   for (const Expr *E : Privates) {
5498     if (E->getType()->isVariablyModifiedType())
5499       // Reserve place for array size.
5500       ++Size;
5501   }
5502   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5503   QualType ReductionArrayTy =
5504       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5505                              /*IndexTypeQuals=*/0);
5506   Address ReductionList =
5507       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5508   const auto *IPriv = Privates.begin();
5509   unsigned Idx = 0;
5510   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5511     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5512     CGF.Builder.CreateStore(
5513         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5514             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5515         Elem);
5516     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5517       // Store array size.
5518       ++Idx;
5519       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5520       llvm::Value *Size = CGF.Builder.CreateIntCast(
5521           CGF.getVLASize(
5522                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5523               .NumElts,
5524           CGF.SizeTy, /*isSigned=*/false);
5525       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5526                               Elem);
5527     }
5528   }
5529 
5530   // 2. Emit reduce_func().
5531   llvm::Function *ReductionFn =
5532       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5533                             Privates, LHSExprs, RHSExprs, ReductionOps);
5534 
5535   // 3. Create static kmp_critical_name lock = { 0 };
5536   std::string Name = getName({"reduction"});
5537   llvm::Value *Lock = getCriticalRegionLock(Name);
5538 
5539   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5540   // RedList, reduce_func, &<lock>);
5541   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5542   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5543   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5544   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5545       ReductionList.getPointer(), CGF.VoidPtrTy);
5546   llvm::Value *Args[] = {
5547       IdentTLoc,                             // ident_t *<loc>
5548       ThreadId,                              // i32 <gtid>
5549       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5550       ReductionArrayTySize,                  // size_type sizeof(RedList)
5551       RL,                                    // void *RedList
5552       ReductionFn, // void (*) (void *, void *) <reduce_func>
5553       Lock         // kmp_critical_name *&<lock>
5554   };
5555   llvm::Value *Res = CGF.EmitRuntimeCall(
5556       OMPBuilder.getOrCreateRuntimeFunction(
5557           CGM.getModule(),
5558           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5559       Args);
5560 
5561   // 5. Build switch(res)
5562   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5563   llvm::SwitchInst *SwInst =
5564       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5565 
5566   // 6. Build case 1:
5567   //  ...
5568   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5569   //  ...
5570   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5571   // break;
5572   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5573   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5574   CGF.EmitBlock(Case1BB);
5575 
5576   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5577   llvm::Value *EndArgs[] = {
5578       IdentTLoc, // ident_t *<loc>
5579       ThreadId,  // i32 <gtid>
5580       Lock       // kmp_critical_name *&<lock>
5581   };
5582   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5583                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5584     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5585     const auto *IPriv = Privates.begin();
5586     const auto *ILHS = LHSExprs.begin();
5587     const auto *IRHS = RHSExprs.begin();
5588     for (const Expr *E : ReductionOps) {
5589       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5590                                      cast<DeclRefExpr>(*IRHS));
5591       ++IPriv;
5592       ++ILHS;
5593       ++IRHS;
5594     }
5595   };
5596   RegionCodeGenTy RCG(CodeGen);
5597   CommonActionTy Action(
5598       nullptr, llvm::None,
5599       OMPBuilder.getOrCreateRuntimeFunction(
5600           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5601                                       : OMPRTL___kmpc_end_reduce),
5602       EndArgs);
5603   RCG.setAction(Action);
5604   RCG(CGF);
5605 
5606   CGF.EmitBranch(DefaultBB);
5607 
5608   // 7. Build case 2:
5609   //  ...
5610   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5611   //  ...
5612   // break;
5613   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5614   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5615   CGF.EmitBlock(Case2BB);
5616 
5617   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5618                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5619     const auto *ILHS = LHSExprs.begin();
5620     const auto *IRHS = RHSExprs.begin();
5621     const auto *IPriv = Privates.begin();
5622     for (const Expr *E : ReductionOps) {
5623       const Expr *XExpr = nullptr;
5624       const Expr *EExpr = nullptr;
5625       const Expr *UpExpr = nullptr;
5626       BinaryOperatorKind BO = BO_Comma;
5627       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5628         if (BO->getOpcode() == BO_Assign) {
5629           XExpr = BO->getLHS();
5630           UpExpr = BO->getRHS();
5631         }
5632       }
5633       // Try to emit update expression as a simple atomic.
5634       const Expr *RHSExpr = UpExpr;
5635       if (RHSExpr) {
5636         // Analyze RHS part of the whole expression.
5637         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5638                 RHSExpr->IgnoreParenImpCasts())) {
5639           // If this is a conditional operator, analyze its condition for
5640           // min/max reduction operator.
5641           RHSExpr = ACO->getCond();
5642         }
5643         if (const auto *BORHS =
5644                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5645           EExpr = BORHS->getRHS();
5646           BO = BORHS->getOpcode();
5647         }
5648       }
5649       if (XExpr) {
5650         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5651         auto &&AtomicRedGen = [BO, VD,
5652                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5653                                     const Expr *EExpr, const Expr *UpExpr) {
5654           LValue X = CGF.EmitLValue(XExpr);
5655           RValue E;
5656           if (EExpr)
5657             E = CGF.EmitAnyExpr(EExpr);
5658           CGF.EmitOMPAtomicSimpleUpdateExpr(
5659               X, E, BO, /*IsXLHSInRHSPart=*/true,
5660               llvm::AtomicOrdering::Monotonic, Loc,
5661               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5662                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5663                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5664                 CGF.emitOMPSimpleStore(
5665                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5666                     VD->getType().getNonReferenceType(), Loc);
5667                 PrivateScope.addPrivate(VD, LHSTemp);
5668                 (void)PrivateScope.Privatize();
5669                 return CGF.EmitAnyExpr(UpExpr);
5670               });
5671         };
5672         if ((*IPriv)->getType()->isArrayType()) {
5673           // Emit atomic reduction for array section.
5674           const auto *RHSVar =
5675               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5676           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5677                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5678         } else {
5679           // Emit atomic reduction for array subscript or single variable.
5680           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5681         }
5682       } else {
5683         // Emit as a critical region.
5684         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5685                                            const Expr *, const Expr *) {
5686           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5687           std::string Name = RT.getName({"atomic_reduction"});
5688           RT.emitCriticalRegion(
5689               CGF, Name,
5690               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5691                 Action.Enter(CGF);
5692                 emitReductionCombiner(CGF, E);
5693               },
5694               Loc);
5695         };
5696         if ((*IPriv)->getType()->isArrayType()) {
5697           const auto *LHSVar =
5698               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5699           const auto *RHSVar =
5700               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5701           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5702                                     CritRedGen);
5703         } else {
5704           CritRedGen(CGF, nullptr, nullptr, nullptr);
5705         }
5706       }
5707       ++ILHS;
5708       ++IRHS;
5709       ++IPriv;
5710     }
5711   };
5712   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5713   if (!WithNowait) {
5714     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5715     llvm::Value *EndArgs[] = {
5716         IdentTLoc, // ident_t *<loc>
5717         ThreadId,  // i32 <gtid>
5718         Lock       // kmp_critical_name *&<lock>
5719     };
5720     CommonActionTy Action(nullptr, llvm::None,
5721                           OMPBuilder.getOrCreateRuntimeFunction(
5722                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5723                           EndArgs);
5724     AtomicRCG.setAction(Action);
5725     AtomicRCG(CGF);
5726   } else {
5727     AtomicRCG(CGF);
5728   }
5729 
5730   CGF.EmitBranch(DefaultBB);
5731   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5732 }
5733 
5734 /// Generates unique name for artificial threadprivate variables.
5735 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5736 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5737                                       const Expr *Ref) {
5738   SmallString<256> Buffer;
5739   llvm::raw_svector_ostream Out(Buffer);
5740   const clang::DeclRefExpr *DE;
5741   const VarDecl *D = ::getBaseDecl(Ref, DE);
5742   if (!D)
5743     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5744   D = D->getCanonicalDecl();
5745   std::string Name = CGM.getOpenMPRuntime().getName(
5746       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5747   Out << Prefix << Name << "_"
5748       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5749   return std::string(Out.str());
5750 }
5751 
5752 /// Emits reduction initializer function:
5753 /// \code
5754 /// void @.red_init(void* %arg, void* %orig) {
5755 /// %0 = bitcast void* %arg to <type>*
5756 /// store <type> <init>, <type>* %0
5757 /// ret void
5758 /// }
5759 /// \endcode
5760 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5761                                            SourceLocation Loc,
5762                                            ReductionCodeGen &RCG, unsigned N) {
5763   ASTContext &C = CGM.getContext();
5764   QualType VoidPtrTy = C.VoidPtrTy;
5765   VoidPtrTy.addRestrict();
5766   FunctionArgList Args;
5767   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5768                           ImplicitParamDecl::Other);
5769   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5770                               ImplicitParamDecl::Other);
5771   Args.emplace_back(&Param);
5772   Args.emplace_back(&ParamOrig);
5773   const auto &FnInfo =
5774       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5775   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5776   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5777   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5778                                     Name, &CGM.getModule());
5779   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5780   Fn->setDoesNotRecurse();
5781   CodeGenFunction CGF(CGM);
5782   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5783   QualType PrivateType = RCG.getPrivateType(N);
5784   Address PrivateAddr = CGF.EmitLoadOfPointer(
5785       CGF.Builder.CreateElementBitCast(
5786           CGF.GetAddrOfLocalVar(&Param),
5787           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5788       C.getPointerType(PrivateType)->castAs<PointerType>());
5789   llvm::Value *Size = nullptr;
5790   // If the size of the reduction item is non-constant, load it from global
5791   // threadprivate variable.
5792   if (RCG.getSizes(N).second) {
5793     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5794         CGF, CGM.getContext().getSizeType(),
5795         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5796     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5797                                 CGM.getContext().getSizeType(), Loc);
5798   }
5799   RCG.emitAggregateType(CGF, N, Size);
5800   Address OrigAddr = Address::invalid();
5801   // If initializer uses initializer from declare reduction construct, emit a
5802   // pointer to the address of the original reduction item (reuired by reduction
5803   // initializer)
5804   if (RCG.usesReductionInitializer(N)) {
5805     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5806     OrigAddr = CGF.EmitLoadOfPointer(
5807         SharedAddr,
5808         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5809   }
5810   // Emit the initializer:
5811   // %0 = bitcast void* %arg to <type>*
5812   // store <type> <init>, <type>* %0
5813   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5814                          [](CodeGenFunction &) { return false; });
5815   CGF.FinishFunction();
5816   return Fn;
5817 }
5818 
5819 /// Emits reduction combiner function:
5820 /// \code
5821 /// void @.red_comb(void* %arg0, void* %arg1) {
5822 /// %lhs = bitcast void* %arg0 to <type>*
5823 /// %rhs = bitcast void* %arg1 to <type>*
5824 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5825 /// store <type> %2, <type>* %lhs
5826 /// ret void
5827 /// }
5828 /// \endcode
5829 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5830                                            SourceLocation Loc,
5831                                            ReductionCodeGen &RCG, unsigned N,
5832                                            const Expr *ReductionOp,
5833                                            const Expr *LHS, const Expr *RHS,
5834                                            const Expr *PrivateRef) {
5835   ASTContext &C = CGM.getContext();
5836   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5837   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5838   FunctionArgList Args;
5839   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5840                                C.VoidPtrTy, ImplicitParamDecl::Other);
5841   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5842                             ImplicitParamDecl::Other);
5843   Args.emplace_back(&ParamInOut);
5844   Args.emplace_back(&ParamIn);
5845   const auto &FnInfo =
5846       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5847   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5848   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5849   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5850                                     Name, &CGM.getModule());
5851   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5852   Fn->setDoesNotRecurse();
5853   CodeGenFunction CGF(CGM);
5854   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5855   llvm::Value *Size = nullptr;
5856   // If the size of the reduction item is non-constant, load it from global
5857   // threadprivate variable.
5858   if (RCG.getSizes(N).second) {
5859     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5860         CGF, CGM.getContext().getSizeType(),
5861         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5862     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5863                                 CGM.getContext().getSizeType(), Loc);
5864   }
5865   RCG.emitAggregateType(CGF, N, Size);
5866   // Remap lhs and rhs variables to the addresses of the function arguments.
5867   // %lhs = bitcast void* %arg0 to <type>*
5868   // %rhs = bitcast void* %arg1 to <type>*
5869   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5870   PrivateScope.addPrivate(
5871       LHSVD,
5872       // Pull out the pointer to the variable.
5873       CGF.EmitLoadOfPointer(
5874           CGF.Builder.CreateElementBitCast(
5875               CGF.GetAddrOfLocalVar(&ParamInOut),
5876               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5877           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5878   PrivateScope.addPrivate(
5879       RHSVD,
5880       // Pull out the pointer to the variable.
5881       CGF.EmitLoadOfPointer(
5882           CGF.Builder.CreateElementBitCast(
5883             CGF.GetAddrOfLocalVar(&ParamIn),
5884             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5885           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5886   PrivateScope.Privatize();
5887   // Emit the combiner body:
5888   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5889   // store <type> %2, <type>* %lhs
5890   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5891       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5892       cast<DeclRefExpr>(RHS));
5893   CGF.FinishFunction();
5894   return Fn;
5895 }
5896 
5897 /// Emits reduction finalizer function:
5898 /// \code
5899 /// void @.red_fini(void* %arg) {
5900 /// %0 = bitcast void* %arg to <type>*
5901 /// <destroy>(<type>* %0)
5902 /// ret void
5903 /// }
5904 /// \endcode
5905 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5906                                            SourceLocation Loc,
5907                                            ReductionCodeGen &RCG, unsigned N) {
5908   if (!RCG.needCleanups(N))
5909     return nullptr;
5910   ASTContext &C = CGM.getContext();
5911   FunctionArgList Args;
5912   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5913                           ImplicitParamDecl::Other);
5914   Args.emplace_back(&Param);
5915   const auto &FnInfo =
5916       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5917   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5918   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5919   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5920                                     Name, &CGM.getModule());
5921   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5922   Fn->setDoesNotRecurse();
5923   CodeGenFunction CGF(CGM);
5924   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5925   Address PrivateAddr = CGF.EmitLoadOfPointer(
5926       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5927   llvm::Value *Size = nullptr;
5928   // If the size of the reduction item is non-constant, load it from global
5929   // threadprivate variable.
5930   if (RCG.getSizes(N).second) {
5931     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5932         CGF, CGM.getContext().getSizeType(),
5933         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5934     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5935                                 CGM.getContext().getSizeType(), Loc);
5936   }
5937   RCG.emitAggregateType(CGF, N, Size);
5938   // Emit the finalizer body:
5939   // <destroy>(<type>* %0)
5940   RCG.emitCleanups(CGF, N, PrivateAddr);
5941   CGF.FinishFunction(Loc);
5942   return Fn;
5943 }
5944 
5945 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5946     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5947     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5948   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5949     return nullptr;
5950 
5951   // Build typedef struct:
5952   // kmp_taskred_input {
5953   //   void *reduce_shar; // shared reduction item
5954   //   void *reduce_orig; // original reduction item used for initialization
5955   //   size_t reduce_size; // size of data item
5956   //   void *reduce_init; // data initialization routine
5957   //   void *reduce_fini; // data finalization routine
5958   //   void *reduce_comb; // data combiner routine
5959   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5960   // } kmp_taskred_input_t;
5961   ASTContext &C = CGM.getContext();
5962   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5963   RD->startDefinition();
5964   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5965   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5966   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5967   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5968   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5969   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5970   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5971       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5972   RD->completeDefinition();
5973   QualType RDType = C.getRecordType(RD);
5974   unsigned Size = Data.ReductionVars.size();
5975   llvm::APInt ArraySize(/*numBits=*/64, Size);
5976   QualType ArrayRDType = C.getConstantArrayType(
5977       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5978   // kmp_task_red_input_t .rd_input.[Size];
5979   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5980   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5981                        Data.ReductionCopies, Data.ReductionOps);
5982   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5983     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5984     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5985                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5986     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5987         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5988         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5989         ".rd_input.gep.");
5990     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5991     // ElemLVal.reduce_shar = &Shareds[Cnt];
5992     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5993     RCG.emitSharedOrigLValue(CGF, Cnt);
5994     llvm::Value *CastedShared =
5995         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5996     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5997     // ElemLVal.reduce_orig = &Origs[Cnt];
5998     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5999     llvm::Value *CastedOrig =
6000         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6001     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6002     RCG.emitAggregateType(CGF, Cnt);
6003     llvm::Value *SizeValInChars;
6004     llvm::Value *SizeVal;
6005     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6006     // We use delayed creation/initialization for VLAs and array sections. It is
6007     // required because runtime does not provide the way to pass the sizes of
6008     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6009     // threadprivate global variables are used to store these values and use
6010     // them in the functions.
6011     bool DelayedCreation = !!SizeVal;
6012     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6013                                                /*isSigned=*/false);
6014     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6015     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6016     // ElemLVal.reduce_init = init;
6017     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6018     llvm::Value *InitAddr =
6019         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6020     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6021     // ElemLVal.reduce_fini = fini;
6022     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6023     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6024     llvm::Value *FiniAddr = Fini
6025                                 ? CGF.EmitCastToVoidPtr(Fini)
6026                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6027     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6028     // ElemLVal.reduce_comb = comb;
6029     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6030     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6031         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6032         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6033     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6034     // ElemLVal.flags = 0;
6035     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6036     if (DelayedCreation) {
6037       CGF.EmitStoreOfScalar(
6038           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6039           FlagsLVal);
6040     } else
6041       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6042                                  FlagsLVal.getType());
6043   }
6044   if (Data.IsReductionWithTaskMod) {
6045     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6046     // is_ws, int num, void *data);
6047     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6048     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6049                                                   CGM.IntTy, /*isSigned=*/true);
6050     llvm::Value *Args[] = {
6051         IdentTLoc, GTid,
6052         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6053                                /*isSigned=*/true),
6054         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6055         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6056             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6057     return CGF.EmitRuntimeCall(
6058         OMPBuilder.getOrCreateRuntimeFunction(
6059             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6060         Args);
6061   }
6062   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6063   llvm::Value *Args[] = {
6064       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6065                                 /*isSigned=*/true),
6066       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6067       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6068                                                       CGM.VoidPtrTy)};
6069   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6070                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6071                              Args);
6072 }
6073 
6074 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6075                                             SourceLocation Loc,
6076                                             bool IsWorksharingReduction) {
6077   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6078   // is_ws, int num, void *data);
6079   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6080   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6081                                                 CGM.IntTy, /*isSigned=*/true);
6082   llvm::Value *Args[] = {IdentTLoc, GTid,
6083                          llvm::ConstantInt::get(CGM.IntTy,
6084                                                 IsWorksharingReduction ? 1 : 0,
6085                                                 /*isSigned=*/true)};
6086   (void)CGF.EmitRuntimeCall(
6087       OMPBuilder.getOrCreateRuntimeFunction(
6088           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6089       Args);
6090 }
6091 
6092 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6093                                               SourceLocation Loc,
6094                                               ReductionCodeGen &RCG,
6095                                               unsigned N) {
6096   auto Sizes = RCG.getSizes(N);
6097   // Emit threadprivate global variable if the type is non-constant
6098   // (Sizes.second = nullptr).
6099   if (Sizes.second) {
6100     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6101                                                      /*isSigned=*/false);
6102     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6103         CGF, CGM.getContext().getSizeType(),
6104         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6105     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6106   }
6107 }
6108 
6109 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6110                                               SourceLocation Loc,
6111                                               llvm::Value *ReductionsPtr,
6112                                               LValue SharedLVal) {
6113   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6114   // *d);
6115   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6116                                                    CGM.IntTy,
6117                                                    /*isSigned=*/true),
6118                          ReductionsPtr,
6119                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6120                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6121   return Address(
6122       CGF.EmitRuntimeCall(
6123           OMPBuilder.getOrCreateRuntimeFunction(
6124               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6125           Args),
6126       CGF.Int8Ty, SharedLVal.getAlignment());
6127 }
6128 
6129 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6130                                        const OMPTaskDataTy &Data) {
6131   if (!CGF.HaveInsertPoint())
6132     return;
6133 
6134   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6135     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6136     OMPBuilder.createTaskwait(CGF.Builder);
6137   } else {
6138     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6139     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6140     auto &M = CGM.getModule();
6141     Address DependenciesArray = Address::invalid();
6142     llvm::Value *NumOfElements;
6143     std::tie(NumOfElements, DependenciesArray) =
6144         emitDependClause(CGF, Data.Dependences, Loc);
6145     llvm::Value *DepWaitTaskArgs[6];
6146     if (!Data.Dependences.empty()) {
6147       DepWaitTaskArgs[0] = UpLoc;
6148       DepWaitTaskArgs[1] = ThreadID;
6149       DepWaitTaskArgs[2] = NumOfElements;
6150       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6151       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6152       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6153 
6154       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6155 
6156       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6157       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6158       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6159       // is specified.
6160       CGF.EmitRuntimeCall(
6161           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6162           DepWaitTaskArgs);
6163 
6164     } else {
6165 
6166       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6167       // global_tid);
6168       llvm::Value *Args[] = {UpLoc, ThreadID};
6169       // Ignore return result until untied tasks are supported.
6170       CGF.EmitRuntimeCall(
6171           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6172           Args);
6173     }
6174   }
6175 
6176   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6177     Region->emitUntiedSwitch(CGF);
6178 }
6179 
6180 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6181                                            OpenMPDirectiveKind InnerKind,
6182                                            const RegionCodeGenTy &CodeGen,
6183                                            bool HasCancel) {
6184   if (!CGF.HaveInsertPoint())
6185     return;
6186   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6187                                  InnerKind != OMPD_critical &&
6188                                      InnerKind != OMPD_master &&
6189                                      InnerKind != OMPD_masked);
6190   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6191 }
6192 
6193 namespace {
6194 enum RTCancelKind {
6195   CancelNoreq = 0,
6196   CancelParallel = 1,
6197   CancelLoop = 2,
6198   CancelSections = 3,
6199   CancelTaskgroup = 4
6200 };
6201 } // anonymous namespace
6202 
6203 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6204   RTCancelKind CancelKind = CancelNoreq;
6205   if (CancelRegion == OMPD_parallel)
6206     CancelKind = CancelParallel;
6207   else if (CancelRegion == OMPD_for)
6208     CancelKind = CancelLoop;
6209   else if (CancelRegion == OMPD_sections)
6210     CancelKind = CancelSections;
6211   else {
6212     assert(CancelRegion == OMPD_taskgroup);
6213     CancelKind = CancelTaskgroup;
6214   }
6215   return CancelKind;
6216 }
6217 
6218 void CGOpenMPRuntime::emitCancellationPointCall(
6219     CodeGenFunction &CGF, SourceLocation Loc,
6220     OpenMPDirectiveKind CancelRegion) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6224   // global_tid, kmp_int32 cncl_kind);
6225   if (auto *OMPRegionInfo =
6226           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6227     // For 'cancellation point taskgroup', the task region info may not have a
6228     // cancel. This may instead happen in another adjacent task.
6229     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6230       llvm::Value *Args[] = {
6231           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6232           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6233       // Ignore return result until untied tasks are supported.
6234       llvm::Value *Result = CGF.EmitRuntimeCall(
6235           OMPBuilder.getOrCreateRuntimeFunction(
6236               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6237           Args);
6238       // if (__kmpc_cancellationpoint()) {
6239       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6240       //   exit from construct;
6241       // }
6242       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6243       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6244       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6245       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6246       CGF.EmitBlock(ExitBB);
6247       if (CancelRegion == OMPD_parallel)
6248         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6249       // exit from construct;
6250       CodeGenFunction::JumpDest CancelDest =
6251           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6252       CGF.EmitBranchThroughCleanup(CancelDest);
6253       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6254     }
6255   }
6256 }
6257 
6258 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6259                                      const Expr *IfCond,
6260                                      OpenMPDirectiveKind CancelRegion) {
6261   if (!CGF.HaveInsertPoint())
6262     return;
6263   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6264   // kmp_int32 cncl_kind);
6265   auto &M = CGM.getModule();
6266   if (auto *OMPRegionInfo =
6267           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6268     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6269                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6270       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6271       llvm::Value *Args[] = {
6272           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6273           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6274       // Ignore return result until untied tasks are supported.
6275       llvm::Value *Result = CGF.EmitRuntimeCall(
6276           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6277       // if (__kmpc_cancel()) {
6278       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6279       //   exit from construct;
6280       // }
6281       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6282       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6283       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6284       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6285       CGF.EmitBlock(ExitBB);
6286       if (CancelRegion == OMPD_parallel)
6287         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6288       // exit from construct;
6289       CodeGenFunction::JumpDest CancelDest =
6290           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6291       CGF.EmitBranchThroughCleanup(CancelDest);
6292       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6293     };
6294     if (IfCond) {
6295       emitIfClause(CGF, IfCond, ThenGen,
6296                    [](CodeGenFunction &, PrePostActionTy &) {});
6297     } else {
6298       RegionCodeGenTy ThenRCG(ThenGen);
6299       ThenRCG(CGF);
6300     }
6301   }
6302 }
6303 
6304 namespace {
6305 /// Cleanup action for uses_allocators support.
6306 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6307   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6308 
6309 public:
6310   OMPUsesAllocatorsActionTy(
6311       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6312       : Allocators(Allocators) {}
6313   void Enter(CodeGenFunction &CGF) override {
6314     if (!CGF.HaveInsertPoint())
6315       return;
6316     for (const auto &AllocatorData : Allocators) {
6317       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6318           CGF, AllocatorData.first, AllocatorData.second);
6319     }
6320   }
6321   void Exit(CodeGenFunction &CGF) override {
6322     if (!CGF.HaveInsertPoint())
6323       return;
6324     for (const auto &AllocatorData : Allocators) {
6325       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6326                                                         AllocatorData.first);
6327     }
6328   }
6329 };
6330 } // namespace
6331 
6332 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6333     const OMPExecutableDirective &D, StringRef ParentName,
6334     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6335     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6336   assert(!ParentName.empty() && "Invalid target region parent name!");
6337   HasEmittedTargetRegion = true;
6338   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6339   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6340     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6341       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6342       if (!D.AllocatorTraits)
6343         continue;
6344       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6345     }
6346   }
6347   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6348   CodeGen.setAction(UsesAllocatorAction);
6349   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6350                                    IsOffloadEntry, CodeGen);
6351 }
6352 
6353 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6354                                              const Expr *Allocator,
6355                                              const Expr *AllocatorTraits) {
6356   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6357   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6358   // Use default memspace handle.
6359   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6360   llvm::Value *NumTraits = llvm::ConstantInt::get(
6361       CGF.IntTy, cast<ConstantArrayType>(
6362                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6363                      ->getSize()
6364                      .getLimitedValue());
6365   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6366   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6367       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6368   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6369                                            AllocatorTraitsLVal.getBaseInfo(),
6370                                            AllocatorTraitsLVal.getTBAAInfo());
6371   llvm::Value *Traits =
6372       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6373 
6374   llvm::Value *AllocatorVal =
6375       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6376                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6377                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6378   // Store to allocator.
6379   CGF.EmitVarDecl(*cast<VarDecl>(
6380       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6381   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6382   AllocatorVal =
6383       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6384                                Allocator->getType(), Allocator->getExprLoc());
6385   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6386 }
6387 
6388 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6389                                              const Expr *Allocator) {
6390   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6391   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6392   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6393   llvm::Value *AllocatorVal =
6394       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6395   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6396                                           CGF.getContext().VoidPtrTy,
6397                                           Allocator->getExprLoc());
6398   (void)CGF.EmitRuntimeCall(
6399       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6400                                             OMPRTL___kmpc_destroy_allocator),
6401       {ThreadId, AllocatorVal});
6402 }
6403 
6404 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6405     const OMPExecutableDirective &D, StringRef ParentName,
6406     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6407     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6408   // Create a unique name for the entry function using the source location
6409   // information of the current target region. The name will be something like:
6410   //
6411   // __omp_offloading_DD_FFFF_PP_lBB
6412   //
6413   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6414   // mangled name of the function that encloses the target region and BB is the
6415   // line number of the target region.
6416 
6417   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6418                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6419   unsigned DeviceID;
6420   unsigned FileID;
6421   unsigned Line;
6422   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6423                            Line);
6424   SmallString<64> EntryFnName;
6425   {
6426     llvm::raw_svector_ostream OS(EntryFnName);
6427     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6428        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6429   }
6430 
6431   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6432 
6433   CodeGenFunction CGF(CGM, true);
6434   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6435   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6436 
6437   if (BuildOutlinedFn)
6438     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6439 
6440   // If this target outline function is not an offload entry, we don't need to
6441   // register it.
6442   if (!IsOffloadEntry)
6443     return;
6444 
6445   // The target region ID is used by the runtime library to identify the current
6446   // target region, so it only has to be unique and not necessarily point to
6447   // anything. It could be the pointer to the outlined function that implements
6448   // the target region, but we aren't using that so that the compiler doesn't
6449   // need to keep that, and could therefore inline the host function if proven
6450   // worthwhile during optimization. In the other hand, if emitting code for the
6451   // device, the ID has to be the function address so that it can retrieved from
6452   // the offloading entry and launched by the runtime library. We also mark the
6453   // outlined function to have external linkage in case we are emitting code for
6454   // the device, because these functions will be entry points to the device.
6455 
6456   if (CGM.getLangOpts().OpenMPIsDevice) {
6457     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6458     OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6459     OutlinedFn->setDSOLocal(false);
6460     if (CGM.getTriple().isAMDGCN())
6461       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6462   } else {
6463     std::string Name = getName({EntryFnName, "region_id"});
6464     OutlinedFnID = new llvm::GlobalVariable(
6465         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6466         llvm::GlobalValue::WeakAnyLinkage,
6467         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6468   }
6469 
6470   // If we do not allow host fallback we still need a named address to use.
6471   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6472   if (!BuildOutlinedFn) {
6473     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6474            "Named kernel already exists?");
6475     TargetRegionEntryAddr = new llvm::GlobalVariable(
6476         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6477         llvm::GlobalValue::InternalLinkage,
6478         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6479   }
6480 
6481   // Register the information for the entry associated with this target region.
6482   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6483       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6484       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6485 
6486   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6487   int32_t DefaultValTeams = -1;
6488   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6489   if (DefaultValTeams > 0 && OutlinedFn) {
6490     OutlinedFn->addFnAttr("omp_target_num_teams",
6491                           std::to_string(DefaultValTeams));
6492   }
6493   int32_t DefaultValThreads = -1;
6494   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6495   if (DefaultValThreads > 0 && OutlinedFn) {
6496     OutlinedFn->addFnAttr("omp_target_thread_limit",
6497                           std::to_string(DefaultValThreads));
6498   }
6499 
6500   if (BuildOutlinedFn)
6501     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6502 }
6503 
6504 /// Checks if the expression is constant or does not have non-trivial function
6505 /// calls.
6506 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6507   // We can skip constant expressions.
6508   // We can skip expressions with trivial calls or simple expressions.
6509   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6510           !E->hasNonTrivialCall(Ctx)) &&
6511          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6512 }
6513 
6514 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6515                                                     const Stmt *Body) {
6516   const Stmt *Child = Body->IgnoreContainers();
6517   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6518     Child = nullptr;
6519     for (const Stmt *S : C->body()) {
6520       if (const auto *E = dyn_cast<Expr>(S)) {
6521         if (isTrivial(Ctx, E))
6522           continue;
6523       }
6524       // Some of the statements can be ignored.
6525       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6526           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6527         continue;
6528       // Analyze declarations.
6529       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6530         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6531               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6532                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6533                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6534                   isa<UsingDirectiveDecl>(D) ||
6535                   isa<OMPDeclareReductionDecl>(D) ||
6536                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6537                 return true;
6538               const auto *VD = dyn_cast<VarDecl>(D);
6539               if (!VD)
6540                 return false;
6541               return VD->hasGlobalStorage() || !VD->isUsed();
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
6556 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6557     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6558     int32_t &DefaultVal) {
6559 
6560   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6561   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6562          "Expected target-based executable directive.");
6563   switch (DirectiveKind) {
6564   case OMPD_target: {
6565     const auto *CS = D.getInnermostCapturedStmt();
6566     const auto *Body =
6567         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6568     const Stmt *ChildStmt =
6569         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6570     if (const auto *NestedDir =
6571             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6572       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6573         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6574           const Expr *NumTeams =
6575               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6576           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6577             if (auto Constant =
6578                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6579               DefaultVal = Constant->getExtValue();
6580           return NumTeams;
6581         }
6582         DefaultVal = 0;
6583         return nullptr;
6584       }
6585       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6586           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6587         DefaultVal = 1;
6588         return nullptr;
6589       }
6590       DefaultVal = 1;
6591       return nullptr;
6592     }
6593     // A value of -1 is used to check if we need to emit no teams region
6594     DefaultVal = -1;
6595     return nullptr;
6596   }
6597   case OMPD_target_teams:
6598   case OMPD_target_teams_distribute:
6599   case OMPD_target_teams_distribute_simd:
6600   case OMPD_target_teams_distribute_parallel_for:
6601   case OMPD_target_teams_distribute_parallel_for_simd: {
6602     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6603       const Expr *NumTeams =
6604           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6605       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6606         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6607           DefaultVal = Constant->getExtValue();
6608       return NumTeams;
6609     }
6610     DefaultVal = 0;
6611     return nullptr;
6612   }
6613   case OMPD_target_parallel:
6614   case OMPD_target_parallel_for:
6615   case OMPD_target_parallel_for_simd:
6616   case OMPD_target_simd:
6617     DefaultVal = 1;
6618     return nullptr;
6619   case OMPD_parallel:
6620   case OMPD_for:
6621   case OMPD_parallel_for:
6622   case OMPD_parallel_master:
6623   case OMPD_parallel_sections:
6624   case OMPD_for_simd:
6625   case OMPD_parallel_for_simd:
6626   case OMPD_cancel:
6627   case OMPD_cancellation_point:
6628   case OMPD_ordered:
6629   case OMPD_threadprivate:
6630   case OMPD_allocate:
6631   case OMPD_task:
6632   case OMPD_simd:
6633   case OMPD_tile:
6634   case OMPD_unroll:
6635   case OMPD_sections:
6636   case OMPD_section:
6637   case OMPD_single:
6638   case OMPD_master:
6639   case OMPD_critical:
6640   case OMPD_taskyield:
6641   case OMPD_barrier:
6642   case OMPD_taskwait:
6643   case OMPD_taskgroup:
6644   case OMPD_atomic:
6645   case OMPD_flush:
6646   case OMPD_depobj:
6647   case OMPD_scan:
6648   case OMPD_teams:
6649   case OMPD_target_data:
6650   case OMPD_target_exit_data:
6651   case OMPD_target_enter_data:
6652   case OMPD_distribute:
6653   case OMPD_distribute_simd:
6654   case OMPD_distribute_parallel_for:
6655   case OMPD_distribute_parallel_for_simd:
6656   case OMPD_teams_distribute:
6657   case OMPD_teams_distribute_simd:
6658   case OMPD_teams_distribute_parallel_for:
6659   case OMPD_teams_distribute_parallel_for_simd:
6660   case OMPD_target_update:
6661   case OMPD_declare_simd:
6662   case OMPD_declare_variant:
6663   case OMPD_begin_declare_variant:
6664   case OMPD_end_declare_variant:
6665   case OMPD_declare_target:
6666   case OMPD_end_declare_target:
6667   case OMPD_declare_reduction:
6668   case OMPD_declare_mapper:
6669   case OMPD_taskloop:
6670   case OMPD_taskloop_simd:
6671   case OMPD_master_taskloop:
6672   case OMPD_master_taskloop_simd:
6673   case OMPD_parallel_master_taskloop:
6674   case OMPD_parallel_master_taskloop_simd:
6675   case OMPD_requires:
6676   case OMPD_metadirective:
6677   case OMPD_unknown:
6678     break;
6679   default:
6680     break;
6681   }
6682   llvm_unreachable("Unexpected directive kind.");
6683 }
6684 
6685 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6686     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6687   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6688          "Clauses associated with the teams directive expected to be emitted "
6689          "only for the host!");
6690   CGBuilderTy &Bld = CGF.Builder;
6691   int32_t DefaultNT = -1;
6692   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6693   if (NumTeams != nullptr) {
6694     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6695 
6696     switch (DirectiveKind) {
6697     case OMPD_target: {
6698       const auto *CS = D.getInnermostCapturedStmt();
6699       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6700       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6701       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6702                                                   /*IgnoreResultAssign*/ true);
6703       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6704                              /*isSigned=*/true);
6705     }
6706     case OMPD_target_teams:
6707     case OMPD_target_teams_distribute:
6708     case OMPD_target_teams_distribute_simd:
6709     case OMPD_target_teams_distribute_parallel_for:
6710     case OMPD_target_teams_distribute_parallel_for_simd: {
6711       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6712       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6713                                                   /*IgnoreResultAssign*/ true);
6714       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6715                              /*isSigned=*/true);
6716     }
6717     default:
6718       break;
6719     }
6720   }
6721 
6722   return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6723 }
6724 
6725 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6726                                   llvm::Value *DefaultThreadLimitVal) {
6727   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6728       CGF.getContext(), CS->getCapturedStmt());
6729   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6730     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6731       llvm::Value *NumThreads = nullptr;
6732       llvm::Value *CondVal = nullptr;
6733       // Handle if clause. If if clause present, the number of threads is
6734       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6735       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6736         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6737         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6738         const OMPIfClause *IfClause = nullptr;
6739         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6740           if (C->getNameModifier() == OMPD_unknown ||
6741               C->getNameModifier() == OMPD_parallel) {
6742             IfClause = C;
6743             break;
6744           }
6745         }
6746         if (IfClause) {
6747           const Expr *Cond = IfClause->getCondition();
6748           bool Result;
6749           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6750             if (!Result)
6751               return CGF.Builder.getInt32(1);
6752           } else {
6753             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6754             if (const auto *PreInit =
6755                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6756               for (const auto *I : PreInit->decls()) {
6757                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6758                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6759                 } else {
6760                   CodeGenFunction::AutoVarEmission Emission =
6761                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6762                   CGF.EmitAutoVarCleanups(Emission);
6763                 }
6764               }
6765             }
6766             CondVal = CGF.EvaluateExprAsBool(Cond);
6767           }
6768         }
6769       }
6770       // Check the value of num_threads clause iff if clause was not specified
6771       // or is not evaluated to false.
6772       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6773         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6774         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6775         const auto *NumThreadsClause =
6776             Dir->getSingleClause<OMPNumThreadsClause>();
6777         CodeGenFunction::LexicalScope Scope(
6778             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6779         if (const auto *PreInit =
6780                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6781           for (const auto *I : PreInit->decls()) {
6782             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6783               CGF.EmitVarDecl(cast<VarDecl>(*I));
6784             } else {
6785               CodeGenFunction::AutoVarEmission Emission =
6786                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6787               CGF.EmitAutoVarCleanups(Emission);
6788             }
6789           }
6790         }
6791         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6792         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6793                                                /*isSigned=*/false);
6794         if (DefaultThreadLimitVal)
6795           NumThreads = CGF.Builder.CreateSelect(
6796               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6797               DefaultThreadLimitVal, NumThreads);
6798       } else {
6799         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6800                                            : CGF.Builder.getInt32(0);
6801       }
6802       // Process condition of the if clause.
6803       if (CondVal) {
6804         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6805                                               CGF.Builder.getInt32(1));
6806       }
6807       return NumThreads;
6808     }
6809     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6810       return CGF.Builder.getInt32(1);
6811     return DefaultThreadLimitVal;
6812   }
6813   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6814                                : CGF.Builder.getInt32(0);
6815 }
6816 
6817 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6818     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6819     int32_t &DefaultVal) {
6820   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6821   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6822          "Expected target-based executable directive.");
6823 
6824   switch (DirectiveKind) {
6825   case OMPD_target:
6826     // Teams have no clause thread_limit
6827     return nullptr;
6828   case OMPD_target_teams:
6829   case OMPD_target_teams_distribute:
6830     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6831       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6832       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6833       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6834         if (auto Constant =
6835                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6836           DefaultVal = Constant->getExtValue();
6837       return ThreadLimit;
6838     }
6839     return nullptr;
6840   case OMPD_target_parallel:
6841   case OMPD_target_parallel_for:
6842   case OMPD_target_parallel_for_simd:
6843   case OMPD_target_teams_distribute_parallel_for:
6844   case OMPD_target_teams_distribute_parallel_for_simd: {
6845     Expr *ThreadLimit = nullptr;
6846     Expr *NumThreads = nullptr;
6847     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6848       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6849       ThreadLimit = ThreadLimitClause->getThreadLimit();
6850       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6851         if (auto Constant =
6852                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6853           DefaultVal = Constant->getExtValue();
6854     }
6855     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6856       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6857       NumThreads = NumThreadsClause->getNumThreads();
6858       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6859         if (auto Constant =
6860                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6861           if (Constant->getExtValue() < DefaultVal) {
6862             DefaultVal = Constant->getExtValue();
6863             ThreadLimit = NumThreads;
6864           }
6865         }
6866       }
6867     }
6868     return ThreadLimit;
6869   }
6870   case OMPD_target_teams_distribute_simd:
6871   case OMPD_target_simd:
6872     DefaultVal = 1;
6873     return nullptr;
6874   case OMPD_parallel:
6875   case OMPD_for:
6876   case OMPD_parallel_for:
6877   case OMPD_parallel_master:
6878   case OMPD_parallel_sections:
6879   case OMPD_for_simd:
6880   case OMPD_parallel_for_simd:
6881   case OMPD_cancel:
6882   case OMPD_cancellation_point:
6883   case OMPD_ordered:
6884   case OMPD_threadprivate:
6885   case OMPD_allocate:
6886   case OMPD_task:
6887   case OMPD_simd:
6888   case OMPD_tile:
6889   case OMPD_unroll:
6890   case OMPD_sections:
6891   case OMPD_section:
6892   case OMPD_single:
6893   case OMPD_master:
6894   case OMPD_critical:
6895   case OMPD_taskyield:
6896   case OMPD_barrier:
6897   case OMPD_taskwait:
6898   case OMPD_taskgroup:
6899   case OMPD_atomic:
6900   case OMPD_flush:
6901   case OMPD_depobj:
6902   case OMPD_scan:
6903   case OMPD_teams:
6904   case OMPD_target_data:
6905   case OMPD_target_exit_data:
6906   case OMPD_target_enter_data:
6907   case OMPD_distribute:
6908   case OMPD_distribute_simd:
6909   case OMPD_distribute_parallel_for:
6910   case OMPD_distribute_parallel_for_simd:
6911   case OMPD_teams_distribute:
6912   case OMPD_teams_distribute_simd:
6913   case OMPD_teams_distribute_parallel_for:
6914   case OMPD_teams_distribute_parallel_for_simd:
6915   case OMPD_target_update:
6916   case OMPD_declare_simd:
6917   case OMPD_declare_variant:
6918   case OMPD_begin_declare_variant:
6919   case OMPD_end_declare_variant:
6920   case OMPD_declare_target:
6921   case OMPD_end_declare_target:
6922   case OMPD_declare_reduction:
6923   case OMPD_declare_mapper:
6924   case OMPD_taskloop:
6925   case OMPD_taskloop_simd:
6926   case OMPD_master_taskloop:
6927   case OMPD_master_taskloop_simd:
6928   case OMPD_parallel_master_taskloop:
6929   case OMPD_parallel_master_taskloop_simd:
6930   case OMPD_requires:
6931   case OMPD_unknown:
6932     break;
6933   default:
6934     break;
6935   }
6936   llvm_unreachable("Unsupported directive kind.");
6937 }
6938 
6939 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6940     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6941   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6942          "Clauses associated with the teams directive expected to be emitted "
6943          "only for the host!");
6944   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6945   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6946          "Expected target-based executable directive.");
6947   CGBuilderTy &Bld = CGF.Builder;
6948   llvm::Value *ThreadLimitVal = nullptr;
6949   llvm::Value *NumThreadsVal = nullptr;
6950   switch (DirectiveKind) {
6951   case OMPD_target: {
6952     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6953     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6954       return NumThreads;
6955     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6956         CGF.getContext(), CS->getCapturedStmt());
6957     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6958       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6959         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6960         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6961         const auto *ThreadLimitClause =
6962             Dir->getSingleClause<OMPThreadLimitClause>();
6963         CodeGenFunction::LexicalScope Scope(
6964             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6965         if (const auto *PreInit =
6966                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6967           for (const auto *I : PreInit->decls()) {
6968             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6969               CGF.EmitVarDecl(cast<VarDecl>(*I));
6970             } else {
6971               CodeGenFunction::AutoVarEmission Emission =
6972                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6973               CGF.EmitAutoVarCleanups(Emission);
6974             }
6975           }
6976         }
6977         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6978             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6979         ThreadLimitVal =
6980             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6981       }
6982       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6983           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6984         CS = Dir->getInnermostCapturedStmt();
6985         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6986             CGF.getContext(), CS->getCapturedStmt());
6987         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6988       }
6989       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6990           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6991         CS = Dir->getInnermostCapturedStmt();
6992         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6993           return NumThreads;
6994       }
6995       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6996         return Bld.getInt32(1);
6997     }
6998     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6999   }
7000   case OMPD_target_teams: {
7001     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7002       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7003       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7004       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7005           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7006       ThreadLimitVal =
7007           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7008     }
7009     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7010     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7011       return NumThreads;
7012     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7013         CGF.getContext(), CS->getCapturedStmt());
7014     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7015       if (Dir->getDirectiveKind() == OMPD_distribute) {
7016         CS = Dir->getInnermostCapturedStmt();
7017         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7018           return NumThreads;
7019       }
7020     }
7021     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7022   }
7023   case OMPD_target_teams_distribute:
7024     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7025       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7026       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7027       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7028           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7029       ThreadLimitVal =
7030           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7031     }
7032     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7033   case OMPD_target_parallel:
7034   case OMPD_target_parallel_for:
7035   case OMPD_target_parallel_for_simd:
7036   case OMPD_target_teams_distribute_parallel_for:
7037   case OMPD_target_teams_distribute_parallel_for_simd: {
7038     llvm::Value *CondVal = nullptr;
7039     // Handle if clause. If if clause present, the number of threads is
7040     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7041     if (D.hasClausesOfKind<OMPIfClause>()) {
7042       const OMPIfClause *IfClause = nullptr;
7043       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7044         if (C->getNameModifier() == OMPD_unknown ||
7045             C->getNameModifier() == OMPD_parallel) {
7046           IfClause = C;
7047           break;
7048         }
7049       }
7050       if (IfClause) {
7051         const Expr *Cond = IfClause->getCondition();
7052         bool Result;
7053         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7054           if (!Result)
7055             return Bld.getInt32(1);
7056         } else {
7057           CodeGenFunction::RunCleanupsScope Scope(CGF);
7058           CondVal = CGF.EvaluateExprAsBool(Cond);
7059         }
7060       }
7061     }
7062     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7063       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7064       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7065       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7066           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7067       ThreadLimitVal =
7068           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7069     }
7070     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7071       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7072       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7073       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7074           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7075       NumThreadsVal =
7076           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7077       ThreadLimitVal = ThreadLimitVal
7078                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7079                                                                 ThreadLimitVal),
7080                                               NumThreadsVal, ThreadLimitVal)
7081                            : NumThreadsVal;
7082     }
7083     if (!ThreadLimitVal)
7084       ThreadLimitVal = Bld.getInt32(0);
7085     if (CondVal)
7086       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7087     return ThreadLimitVal;
7088   }
7089   case OMPD_target_teams_distribute_simd:
7090   case OMPD_target_simd:
7091     return Bld.getInt32(1);
7092   case OMPD_parallel:
7093   case OMPD_for:
7094   case OMPD_parallel_for:
7095   case OMPD_parallel_master:
7096   case OMPD_parallel_sections:
7097   case OMPD_for_simd:
7098   case OMPD_parallel_for_simd:
7099   case OMPD_cancel:
7100   case OMPD_cancellation_point:
7101   case OMPD_ordered:
7102   case OMPD_threadprivate:
7103   case OMPD_allocate:
7104   case OMPD_task:
7105   case OMPD_simd:
7106   case OMPD_tile:
7107   case OMPD_unroll:
7108   case OMPD_sections:
7109   case OMPD_section:
7110   case OMPD_single:
7111   case OMPD_master:
7112   case OMPD_critical:
7113   case OMPD_taskyield:
7114   case OMPD_barrier:
7115   case OMPD_taskwait:
7116   case OMPD_taskgroup:
7117   case OMPD_atomic:
7118   case OMPD_flush:
7119   case OMPD_depobj:
7120   case OMPD_scan:
7121   case OMPD_teams:
7122   case OMPD_target_data:
7123   case OMPD_target_exit_data:
7124   case OMPD_target_enter_data:
7125   case OMPD_distribute:
7126   case OMPD_distribute_simd:
7127   case OMPD_distribute_parallel_for:
7128   case OMPD_distribute_parallel_for_simd:
7129   case OMPD_teams_distribute:
7130   case OMPD_teams_distribute_simd:
7131   case OMPD_teams_distribute_parallel_for:
7132   case OMPD_teams_distribute_parallel_for_simd:
7133   case OMPD_target_update:
7134   case OMPD_declare_simd:
7135   case OMPD_declare_variant:
7136   case OMPD_begin_declare_variant:
7137   case OMPD_end_declare_variant:
7138   case OMPD_declare_target:
7139   case OMPD_end_declare_target:
7140   case OMPD_declare_reduction:
7141   case OMPD_declare_mapper:
7142   case OMPD_taskloop:
7143   case OMPD_taskloop_simd:
7144   case OMPD_master_taskloop:
7145   case OMPD_master_taskloop_simd:
7146   case OMPD_parallel_master_taskloop:
7147   case OMPD_parallel_master_taskloop_simd:
7148   case OMPD_requires:
7149   case OMPD_metadirective:
7150   case OMPD_unknown:
7151     break;
7152   default:
7153     break;
7154   }
7155   llvm_unreachable("Unsupported directive kind.");
7156 }
7157 
7158 namespace {
7159 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7160 
7161 // Utility to handle information from clauses associated with a given
7162 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7163 // It provides a convenient interface to obtain the information and generate
7164 // code for that information.
7165 class MappableExprsHandler {
7166 public:
7167   /// Values for bit flags used to specify the mapping type for
7168   /// offloading.
7169   enum OpenMPOffloadMappingFlags : uint64_t {
7170     /// No flags
7171     OMP_MAP_NONE = 0x0,
7172     /// Allocate memory on the device and move data from host to device.
7173     OMP_MAP_TO = 0x01,
7174     /// Allocate memory on the device and move data from device to host.
7175     OMP_MAP_FROM = 0x02,
7176     /// Always perform the requested mapping action on the element, even
7177     /// if it was already mapped before.
7178     OMP_MAP_ALWAYS = 0x04,
7179     /// Delete the element from the device environment, ignoring the
7180     /// current reference count associated with the element.
7181     OMP_MAP_DELETE = 0x08,
7182     /// The element being mapped is a pointer-pointee pair; both the
7183     /// pointer and the pointee should be mapped.
7184     OMP_MAP_PTR_AND_OBJ = 0x10,
7185     /// This flags signals that the base address of an entry should be
7186     /// passed to the target kernel as an argument.
7187     OMP_MAP_TARGET_PARAM = 0x20,
7188     /// Signal that the runtime library has to return the device pointer
7189     /// in the current position for the data being mapped. Used when we have the
7190     /// use_device_ptr or use_device_addr clause.
7191     OMP_MAP_RETURN_PARAM = 0x40,
7192     /// This flag signals that the reference being passed is a pointer to
7193     /// private data.
7194     OMP_MAP_PRIVATE = 0x80,
7195     /// Pass the element to the device by value.
7196     OMP_MAP_LITERAL = 0x100,
7197     /// Implicit map
7198     OMP_MAP_IMPLICIT = 0x200,
7199     /// Close is a hint to the runtime to allocate memory close to
7200     /// the target device.
7201     OMP_MAP_CLOSE = 0x400,
7202     /// 0x800 is reserved for compatibility with XLC.
7203     /// Produce a runtime error if the data is not already allocated.
7204     OMP_MAP_PRESENT = 0x1000,
7205     // Increment and decrement a separate reference counter so that the data
7206     // cannot be unmapped within the associated region.  Thus, this flag is
7207     // intended to be used on 'target' and 'target data' directives because they
7208     // are inherently structured.  It is not intended to be used on 'target
7209     // enter data' and 'target exit data' directives because they are inherently
7210     // dynamic.
7211     // This is an OpenMP extension for the sake of OpenACC support.
7212     OMP_MAP_OMPX_HOLD = 0x2000,
7213     /// Signal that the runtime library should use args as an array of
7214     /// descriptor_dim pointers and use args_size as dims. Used when we have
7215     /// non-contiguous list items in target update directive
7216     OMP_MAP_NON_CONTIG = 0x100000000000,
7217     /// The 16 MSBs of the flags indicate whether the entry is member of some
7218     /// struct/class.
7219     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7220     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7221   };
7222 
7223   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7224   static unsigned getFlagMemberOffset() {
7225     unsigned Offset = 0;
7226     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7227          Remain = Remain >> 1)
7228       Offset++;
7229     return Offset;
7230   }
7231 
7232   /// Class that holds debugging information for a data mapping to be passed to
7233   /// the runtime library.
7234   class MappingExprInfo {
7235     /// The variable declaration used for the data mapping.
7236     const ValueDecl *MapDecl = nullptr;
7237     /// The original expression used in the map clause, or null if there is
7238     /// none.
7239     const Expr *MapExpr = nullptr;
7240 
7241   public:
7242     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7243         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7244 
7245     const ValueDecl *getMapDecl() const { return MapDecl; }
7246     const Expr *getMapExpr() const { return MapExpr; }
7247   };
7248 
7249   /// Class that associates information with a base pointer to be passed to the
7250   /// runtime library.
7251   class BasePointerInfo {
7252     /// The base pointer.
7253     llvm::Value *Ptr = nullptr;
7254     /// The base declaration that refers to this device pointer, or null if
7255     /// there is none.
7256     const ValueDecl *DevPtrDecl = nullptr;
7257 
7258   public:
7259     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7260         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7261     llvm::Value *operator*() const { return Ptr; }
7262     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7263     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7264   };
7265 
7266   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7267   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7268   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7269   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7270   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7271   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7272   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7273 
7274   /// This structure contains combined information generated for mappable
7275   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7276   /// mappers, and non-contiguous information.
7277   struct MapCombinedInfoTy {
7278     struct StructNonContiguousInfo {
7279       bool IsNonContiguous = false;
7280       MapDimArrayTy Dims;
7281       MapNonContiguousArrayTy Offsets;
7282       MapNonContiguousArrayTy Counts;
7283       MapNonContiguousArrayTy Strides;
7284     };
7285     MapExprsArrayTy Exprs;
7286     MapBaseValuesArrayTy BasePointers;
7287     MapValuesArrayTy Pointers;
7288     MapValuesArrayTy Sizes;
7289     MapFlagsArrayTy Types;
7290     MapMappersArrayTy Mappers;
7291     StructNonContiguousInfo NonContigInfo;
7292 
7293     /// Append arrays in \a CurInfo.
7294     void append(MapCombinedInfoTy &CurInfo) {
7295       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7296       BasePointers.append(CurInfo.BasePointers.begin(),
7297                           CurInfo.BasePointers.end());
7298       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7299       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7300       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7301       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7302       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7303                                  CurInfo.NonContigInfo.Dims.end());
7304       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7305                                     CurInfo.NonContigInfo.Offsets.end());
7306       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7307                                    CurInfo.NonContigInfo.Counts.end());
7308       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7309                                     CurInfo.NonContigInfo.Strides.end());
7310     }
7311   };
7312 
7313   /// Map between a struct and the its lowest & highest elements which have been
7314   /// mapped.
7315   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7316   ///                    HE(FieldIndex, Pointer)}
7317   struct StructRangeInfoTy {
7318     MapCombinedInfoTy PreliminaryMapData;
7319     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7320         0, Address::invalid()};
7321     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7322         0, Address::invalid()};
7323     Address Base = Address::invalid();
7324     Address LB = Address::invalid();
7325     bool IsArraySection = false;
7326     bool HasCompleteRecord = false;
7327   };
7328 
7329 private:
7330   /// Kind that defines how a device pointer has to be returned.
7331   struct MapInfo {
7332     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7333     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7334     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7335     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7336     bool ReturnDevicePointer = false;
7337     bool IsImplicit = false;
7338     const ValueDecl *Mapper = nullptr;
7339     const Expr *VarRef = nullptr;
7340     bool ForDeviceAddr = false;
7341 
7342     MapInfo() = default;
7343     MapInfo(
7344         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7345         OpenMPMapClauseKind MapType,
7346         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7347         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7348         bool ReturnDevicePointer, bool IsImplicit,
7349         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7350         bool ForDeviceAddr = false)
7351         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7352           MotionModifiers(MotionModifiers),
7353           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7354           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7355   };
7356 
7357   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7358   /// member and there is no map information about it, then emission of that
7359   /// entry is deferred until the whole struct has been processed.
7360   struct DeferredDevicePtrEntryTy {
7361     const Expr *IE = nullptr;
7362     const ValueDecl *VD = nullptr;
7363     bool ForDeviceAddr = false;
7364 
7365     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7366                              bool ForDeviceAddr)
7367         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7368   };
7369 
7370   /// The target directive from where the mappable clauses were extracted. It
7371   /// is either a executable directive or a user-defined mapper directive.
7372   llvm::PointerUnion<const OMPExecutableDirective *,
7373                      const OMPDeclareMapperDecl *>
7374       CurDir;
7375 
7376   /// Function the directive is being generated for.
7377   CodeGenFunction &CGF;
7378 
7379   /// Set of all first private variables in the current directive.
7380   /// bool data is set to true if the variable is implicitly marked as
7381   /// firstprivate, false otherwise.
7382   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7383 
7384   /// Map between device pointer declarations and their expression components.
7385   /// The key value for declarations in 'this' is null.
7386   llvm::DenseMap<
7387       const ValueDecl *,
7388       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7389       DevPointersMap;
7390 
7391   /// Map between lambda declarations and their map type.
7392   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7393 
7394   llvm::Value *getExprTypeSize(const Expr *E) const {
7395     QualType ExprTy = E->getType().getCanonicalType();
7396 
7397     // Calculate the size for array shaping expression.
7398     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7399       llvm::Value *Size =
7400           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7401       for (const Expr *SE : OAE->getDimensions()) {
7402         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7403         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7404                                       CGF.getContext().getSizeType(),
7405                                       SE->getExprLoc());
7406         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7407       }
7408       return Size;
7409     }
7410 
7411     // Reference types are ignored for mapping purposes.
7412     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7413       ExprTy = RefTy->getPointeeType().getCanonicalType();
7414 
7415     // Given that an array section is considered a built-in type, we need to
7416     // do the calculation based on the length of the section instead of relying
7417     // on CGF.getTypeSize(E->getType()).
7418     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7419       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7420                             OAE->getBase()->IgnoreParenImpCasts())
7421                             .getCanonicalType();
7422 
7423       // If there is no length associated with the expression and lower bound is
7424       // not specified too, that means we are using the whole length of the
7425       // base.
7426       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7427           !OAE->getLowerBound())
7428         return CGF.getTypeSize(BaseTy);
7429 
7430       llvm::Value *ElemSize;
7431       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7432         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7433       } else {
7434         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7435         assert(ATy && "Expecting array type if not a pointer type.");
7436         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7437       }
7438 
7439       // If we don't have a length at this point, that is because we have an
7440       // array section with a single element.
7441       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7442         return ElemSize;
7443 
7444       if (const Expr *LenExpr = OAE->getLength()) {
7445         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7446         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7447                                              CGF.getContext().getSizeType(),
7448                                              LenExpr->getExprLoc());
7449         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7450       }
7451       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7452              OAE->getLowerBound() && "expected array_section[lb:].");
7453       // Size = sizetype - lb * elemtype;
7454       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7455       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7456       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7457                                        CGF.getContext().getSizeType(),
7458                                        OAE->getLowerBound()->getExprLoc());
7459       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7460       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7461       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7462       LengthVal = CGF.Builder.CreateSelect(
7463           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7464       return LengthVal;
7465     }
7466     return CGF.getTypeSize(ExprTy);
7467   }
7468 
7469   /// Return the corresponding bits for a given map clause modifier. Add
7470   /// a flag marking the map as a pointer if requested. Add a flag marking the
7471   /// map as the first one of a series of maps that relate to the same map
7472   /// expression.
7473   OpenMPOffloadMappingFlags getMapTypeBits(
7474       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7475       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7476       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7477     OpenMPOffloadMappingFlags Bits =
7478         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7479     switch (MapType) {
7480     case OMPC_MAP_alloc:
7481     case OMPC_MAP_release:
7482       // alloc and release is the default behavior in the runtime library,  i.e.
7483       // if we don't pass any bits alloc/release that is what the runtime is
7484       // going to do. Therefore, we don't need to signal anything for these two
7485       // type modifiers.
7486       break;
7487     case OMPC_MAP_to:
7488       Bits |= OMP_MAP_TO;
7489       break;
7490     case OMPC_MAP_from:
7491       Bits |= OMP_MAP_FROM;
7492       break;
7493     case OMPC_MAP_tofrom:
7494       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7495       break;
7496     case OMPC_MAP_delete:
7497       Bits |= OMP_MAP_DELETE;
7498       break;
7499     case OMPC_MAP_unknown:
7500       llvm_unreachable("Unexpected map type!");
7501     }
7502     if (AddPtrFlag)
7503       Bits |= OMP_MAP_PTR_AND_OBJ;
7504     if (AddIsTargetParamFlag)
7505       Bits |= OMP_MAP_TARGET_PARAM;
7506     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7507       Bits |= OMP_MAP_ALWAYS;
7508     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7509       Bits |= OMP_MAP_CLOSE;
7510     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7511         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7512       Bits |= OMP_MAP_PRESENT;
7513     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7514       Bits |= OMP_MAP_OMPX_HOLD;
7515     if (IsNonContiguous)
7516       Bits |= OMP_MAP_NON_CONTIG;
7517     return Bits;
7518   }
7519 
7520   /// Return true if the provided expression is a final array section. A
7521   /// final array section, is one whose length can't be proved to be one.
7522   bool isFinalArraySectionExpression(const Expr *E) const {
7523     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7524 
7525     // It is not an array section and therefore not a unity-size one.
7526     if (!OASE)
7527       return false;
7528 
7529     // An array section with no colon always refer to a single element.
7530     if (OASE->getColonLocFirst().isInvalid())
7531       return false;
7532 
7533     const Expr *Length = OASE->getLength();
7534 
7535     // If we don't have a length we have to check if the array has size 1
7536     // for this dimension. Also, we should always expect a length if the
7537     // base type is pointer.
7538     if (!Length) {
7539       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7540                              OASE->getBase()->IgnoreParenImpCasts())
7541                              .getCanonicalType();
7542       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7543         return ATy->getSize().getSExtValue() != 1;
7544       // If we don't have a constant dimension length, we have to consider
7545       // the current section as having any size, so it is not necessarily
7546       // unitary. If it happen to be unity size, that's user fault.
7547       return true;
7548     }
7549 
7550     // Check if the length evaluates to 1.
7551     Expr::EvalResult Result;
7552     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7553       return true; // Can have more that size 1.
7554 
7555     llvm::APSInt ConstLength = Result.Val.getInt();
7556     return ConstLength.getSExtValue() != 1;
7557   }
7558 
7559   /// Generate the base pointers, section pointers, sizes, map type bits, and
7560   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7561   /// map type, map or motion modifiers, and expression components.
7562   /// \a IsFirstComponent should be set to true if the provided set of
7563   /// components is the first associated with a capture.
7564   void generateInfoForComponentList(
7565       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7566       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7567       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7568       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7569       bool IsFirstComponentList, bool IsImplicit,
7570       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7571       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7572       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7573           OverlappedElements = llvm::None) const {
7574     // The following summarizes what has to be generated for each map and the
7575     // types below. The generated information is expressed in this order:
7576     // base pointer, section pointer, size, flags
7577     // (to add to the ones that come from the map type and modifier).
7578     //
7579     // double d;
7580     // int i[100];
7581     // float *p;
7582     //
7583     // struct S1 {
7584     //   int i;
7585     //   float f[50];
7586     // }
7587     // struct S2 {
7588     //   int i;
7589     //   float f[50];
7590     //   S1 s;
7591     //   double *p;
7592     //   struct S2 *ps;
7593     //   int &ref;
7594     // }
7595     // S2 s;
7596     // S2 *ps;
7597     //
7598     // map(d)
7599     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7600     //
7601     // map(i)
7602     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7603     //
7604     // map(i[1:23])
7605     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7606     //
7607     // map(p)
7608     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7609     //
7610     // map(p[1:24])
7611     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7612     // in unified shared memory mode or for local pointers
7613     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7614     //
7615     // map(s)
7616     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7617     //
7618     // map(s.i)
7619     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7620     //
7621     // map(s.s.f)
7622     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7623     //
7624     // map(s.p)
7625     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7626     //
7627     // map(to: s.p[:22])
7628     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7629     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7630     // &(s.p), &(s.p[0]), 22*sizeof(double),
7631     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7632     // (*) alloc space for struct members, only this is a target parameter
7633     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7634     //      optimizes this entry out, same in the examples below)
7635     // (***) map the pointee (map: to)
7636     //
7637     // map(to: s.ref)
7638     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7639     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7640     // (*) alloc space for struct members, only this is a target parameter
7641     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7642     //      optimizes this entry out, same in the examples below)
7643     // (***) map the pointee (map: to)
7644     //
7645     // map(s.ps)
7646     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7647     //
7648     // map(from: s.ps->s.i)
7649     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7650     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7651     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7652     //
7653     // map(to: s.ps->ps)
7654     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7655     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7656     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7657     //
7658     // map(s.ps->ps->ps)
7659     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7660     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7661     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7662     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7663     //
7664     // map(to: s.ps->ps->s.f[:22])
7665     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7666     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7667     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7668     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7669     //
7670     // map(ps)
7671     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7672     //
7673     // map(ps->i)
7674     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7675     //
7676     // map(ps->s.f)
7677     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7678     //
7679     // map(from: ps->p)
7680     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7681     //
7682     // map(to: ps->p[:22])
7683     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7684     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7685     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7686     //
7687     // map(ps->ps)
7688     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7689     //
7690     // map(from: ps->ps->s.i)
7691     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7692     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7693     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7694     //
7695     // map(from: ps->ps->ps)
7696     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7697     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7698     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7699     //
7700     // map(ps->ps->ps->ps)
7701     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7702     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7703     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7704     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7705     //
7706     // map(to: ps->ps->ps->s.f[:22])
7707     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7708     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7709     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7710     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7711     //
7712     // map(to: s.f[:22]) map(from: s.p[:33])
7713     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7714     //     sizeof(double*) (**), TARGET_PARAM
7715     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7716     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7717     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7718     // (*) allocate contiguous space needed to fit all mapped members even if
7719     //     we allocate space for members not mapped (in this example,
7720     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7721     //     them as well because they fall between &s.f[0] and &s.p)
7722     //
7723     // map(from: s.f[:22]) map(to: ps->p[:33])
7724     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7725     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7726     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7727     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7728     // (*) the struct this entry pertains to is the 2nd element in the list of
7729     //     arguments, hence MEMBER_OF(2)
7730     //
7731     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7732     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7733     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7734     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7735     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7736     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7737     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7738     // (*) the struct this entry pertains to is the 4th element in the list
7739     //     of arguments, hence MEMBER_OF(4)
7740 
7741     // Track if the map information being generated is the first for a capture.
7742     bool IsCaptureFirstInfo = IsFirstComponentList;
7743     // When the variable is on a declare target link or in a to clause with
7744     // unified memory, a reference is needed to hold the host/device address
7745     // of the variable.
7746     bool RequiresReference = false;
7747 
7748     // Scan the components from the base to the complete expression.
7749     auto CI = Components.rbegin();
7750     auto CE = Components.rend();
7751     auto I = CI;
7752 
7753     // Track if the map information being generated is the first for a list of
7754     // components.
7755     bool IsExpressionFirstInfo = true;
7756     bool FirstPointerInComplexData = false;
7757     Address BP = Address::invalid();
7758     const Expr *AssocExpr = I->getAssociatedExpression();
7759     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7760     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7761     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7762 
7763     if (isa<MemberExpr>(AssocExpr)) {
7764       // The base is the 'this' pointer. The content of the pointer is going
7765       // to be the base of the field being mapped.
7766       BP = CGF.LoadCXXThisAddress();
7767     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7768                (OASE &&
7769                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7770       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7771     } else if (OAShE &&
7772                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7773       BP = Address(
7774           CGF.EmitScalarExpr(OAShE->getBase()),
7775           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7776           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7777     } else {
7778       // The base is the reference to the variable.
7779       // BP = &Var.
7780       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7781       if (const auto *VD =
7782               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7783         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7784                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7785           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7786               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7787                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7788             RequiresReference = true;
7789             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7790           }
7791         }
7792       }
7793 
7794       // If the variable is a pointer and is being dereferenced (i.e. is not
7795       // the last component), the base has to be the pointer itself, not its
7796       // reference. References are ignored for mapping purposes.
7797       QualType Ty =
7798           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7799       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7800         // No need to generate individual map information for the pointer, it
7801         // can be associated with the combined storage if shared memory mode is
7802         // active or the base declaration is not global variable.
7803         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7804         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7805             !VD || VD->hasLocalStorage())
7806           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7807         else
7808           FirstPointerInComplexData = true;
7809         ++I;
7810       }
7811     }
7812 
7813     // Track whether a component of the list should be marked as MEMBER_OF some
7814     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7815     // in a component list should be marked as MEMBER_OF, all subsequent entries
7816     // do not belong to the base struct. E.g.
7817     // struct S2 s;
7818     // s.ps->ps->ps->f[:]
7819     //   (1) (2) (3) (4)
7820     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7821     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7822     // is the pointee of ps(2) which is not member of struct s, so it should not
7823     // be marked as such (it is still PTR_AND_OBJ).
7824     // The variable is initialized to false so that PTR_AND_OBJ entries which
7825     // are not struct members are not considered (e.g. array of pointers to
7826     // data).
7827     bool ShouldBeMemberOf = false;
7828 
7829     // Variable keeping track of whether or not we have encountered a component
7830     // in the component list which is a member expression. Useful when we have a
7831     // pointer or a final array section, in which case it is the previous
7832     // component in the list which tells us whether we have a member expression.
7833     // E.g. X.f[:]
7834     // While processing the final array section "[:]" it is "f" which tells us
7835     // whether we are dealing with a member of a declared struct.
7836     const MemberExpr *EncounteredME = nullptr;
7837 
7838     // Track for the total number of dimension. Start from one for the dummy
7839     // dimension.
7840     uint64_t DimSize = 1;
7841 
7842     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7843     bool IsPrevMemberReference = false;
7844 
7845     for (; I != CE; ++I) {
7846       // If the current component is member of a struct (parent struct) mark it.
7847       if (!EncounteredME) {
7848         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7849         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7850         // as MEMBER_OF the parent struct.
7851         if (EncounteredME) {
7852           ShouldBeMemberOf = true;
7853           // Do not emit as complex pointer if this is actually not array-like
7854           // expression.
7855           if (FirstPointerInComplexData) {
7856             QualType Ty = std::prev(I)
7857                               ->getAssociatedDeclaration()
7858                               ->getType()
7859                               .getNonReferenceType();
7860             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7861             FirstPointerInComplexData = false;
7862           }
7863         }
7864       }
7865 
7866       auto Next = std::next(I);
7867 
7868       // We need to generate the addresses and sizes if this is the last
7869       // component, if the component is a pointer or if it is an array section
7870       // whose length can't be proved to be one. If this is a pointer, it
7871       // becomes the base address for the following components.
7872 
7873       // A final array section, is one whose length can't be proved to be one.
7874       // If the map item is non-contiguous then we don't treat any array section
7875       // as final array section.
7876       bool IsFinalArraySection =
7877           !IsNonContiguous &&
7878           isFinalArraySectionExpression(I->getAssociatedExpression());
7879 
7880       // If we have a declaration for the mapping use that, otherwise use
7881       // the base declaration of the map clause.
7882       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7883                                      ? I->getAssociatedDeclaration()
7884                                      : BaseDecl;
7885       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7886                                                : MapExpr;
7887 
7888       // Get information on whether the element is a pointer. Have to do a
7889       // special treatment for array sections given that they are built-in
7890       // types.
7891       const auto *OASE =
7892           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7893       const auto *OAShE =
7894           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7895       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7896       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7897       bool IsPointer =
7898           OAShE ||
7899           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7900                        .getCanonicalType()
7901                        ->isAnyPointerType()) ||
7902           I->getAssociatedExpression()->getType()->isAnyPointerType();
7903       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7904                                MapDecl &&
7905                                MapDecl->getType()->isLValueReferenceType();
7906       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7907 
7908       if (OASE)
7909         ++DimSize;
7910 
7911       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7912           IsFinalArraySection) {
7913         // If this is not the last component, we expect the pointer to be
7914         // associated with an array expression or member expression.
7915         assert((Next == CE ||
7916                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7917                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7918                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7919                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7920                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7921                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7922                "Unexpected expression");
7923 
7924         Address LB = Address::invalid();
7925         Address LowestElem = Address::invalid();
7926         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7927                                        const MemberExpr *E) {
7928           const Expr *BaseExpr = E->getBase();
7929           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7930           // scalar.
7931           LValue BaseLV;
7932           if (E->isArrow()) {
7933             LValueBaseInfo BaseInfo;
7934             TBAAAccessInfo TBAAInfo;
7935             Address Addr =
7936                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7937             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7938             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7939           } else {
7940             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7941           }
7942           return BaseLV;
7943         };
7944         if (OAShE) {
7945           LowestElem = LB =
7946               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7947                       CGF.ConvertTypeForMem(
7948                           OAShE->getBase()->getType()->getPointeeType()),
7949                       CGF.getContext().getTypeAlignInChars(
7950                           OAShE->getBase()->getType()));
7951         } else if (IsMemberReference) {
7952           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7953           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7954           LowestElem = CGF.EmitLValueForFieldInitialization(
7955                               BaseLVal, cast<FieldDecl>(MapDecl))
7956                            .getAddress(CGF);
7957           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7958                    .getAddress(CGF);
7959         } else {
7960           LowestElem = LB =
7961               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7962                   .getAddress(CGF);
7963         }
7964 
7965         // If this component is a pointer inside the base struct then we don't
7966         // need to create any entry for it - it will be combined with the object
7967         // it is pointing to into a single PTR_AND_OBJ entry.
7968         bool IsMemberPointerOrAddr =
7969             EncounteredME &&
7970             (((IsPointer || ForDeviceAddr) &&
7971               I->getAssociatedExpression() == EncounteredME) ||
7972              (IsPrevMemberReference && !IsPointer) ||
7973              (IsMemberReference && Next != CE &&
7974               !Next->getAssociatedExpression()->getType()->isPointerType()));
7975         if (!OverlappedElements.empty() && Next == CE) {
7976           // Handle base element with the info for overlapped elements.
7977           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7978           assert(!IsPointer &&
7979                  "Unexpected base element with the pointer type.");
7980           // Mark the whole struct as the struct that requires allocation on the
7981           // device.
7982           PartialStruct.LowestElem = {0, LowestElem};
7983           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7984               I->getAssociatedExpression()->getType());
7985           Address HB = CGF.Builder.CreateConstGEP(
7986               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7987                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7988               TypeSize.getQuantity() - 1);
7989           PartialStruct.HighestElem = {
7990               std::numeric_limits<decltype(
7991                   PartialStruct.HighestElem.first)>::max(),
7992               HB};
7993           PartialStruct.Base = BP;
7994           PartialStruct.LB = LB;
7995           assert(
7996               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7997               "Overlapped elements must be used only once for the variable.");
7998           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7999           // Emit data for non-overlapped data.
8000           OpenMPOffloadMappingFlags Flags =
8001               OMP_MAP_MEMBER_OF |
8002               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8003                              /*AddPtrFlag=*/false,
8004                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8005           llvm::Value *Size = nullptr;
8006           // Do bitcopy of all non-overlapped structure elements.
8007           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8008                    Component : OverlappedElements) {
8009             Address ComponentLB = Address::invalid();
8010             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8011                  Component) {
8012               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8013                 const auto *FD = dyn_cast<FieldDecl>(VD);
8014                 if (FD && FD->getType()->isLValueReferenceType()) {
8015                   const auto *ME =
8016                       cast<MemberExpr>(MC.getAssociatedExpression());
8017                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8018                   ComponentLB =
8019                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8020                           .getAddress(CGF);
8021                 } else {
8022                   ComponentLB =
8023                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8024                           .getAddress(CGF);
8025                 }
8026                 Size = CGF.Builder.CreatePtrDiff(
8027                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8028                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8029                 break;
8030               }
8031             }
8032             assert(Size && "Failed to determine structure size");
8033             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8034             CombinedInfo.BasePointers.push_back(BP.getPointer());
8035             CombinedInfo.Pointers.push_back(LB.getPointer());
8036             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8037                 Size, CGF.Int64Ty, /*isSigned=*/true));
8038             CombinedInfo.Types.push_back(Flags);
8039             CombinedInfo.Mappers.push_back(nullptr);
8040             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8041                                                                       : 1);
8042             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8043           }
8044           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8045           CombinedInfo.BasePointers.push_back(BP.getPointer());
8046           CombinedInfo.Pointers.push_back(LB.getPointer());
8047           Size = CGF.Builder.CreatePtrDiff(
8048               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8049               CGF.EmitCastToVoidPtr(LB.getPointer()));
8050           CombinedInfo.Sizes.push_back(
8051               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8052           CombinedInfo.Types.push_back(Flags);
8053           CombinedInfo.Mappers.push_back(nullptr);
8054           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8055                                                                     : 1);
8056           break;
8057         }
8058         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8059         if (!IsMemberPointerOrAddr ||
8060             (Next == CE && MapType != OMPC_MAP_unknown)) {
8061           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8062           CombinedInfo.BasePointers.push_back(BP.getPointer());
8063           CombinedInfo.Pointers.push_back(LB.getPointer());
8064           CombinedInfo.Sizes.push_back(
8065               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8066           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8067                                                                     : 1);
8068 
8069           // If Mapper is valid, the last component inherits the mapper.
8070           bool HasMapper = Mapper && Next == CE;
8071           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8072 
8073           // We need to add a pointer flag for each map that comes from the
8074           // same expression except for the first one. We also need to signal
8075           // this map is the first one that relates with the current capture
8076           // (there is a set of entries for each capture).
8077           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8078               MapType, MapModifiers, MotionModifiers, IsImplicit,
8079               !IsExpressionFirstInfo || RequiresReference ||
8080                   FirstPointerInComplexData || IsMemberReference,
8081               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8082 
8083           if (!IsExpressionFirstInfo || IsMemberReference) {
8084             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8085             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8086             if (IsPointer || (IsMemberReference && Next != CE))
8087               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8088                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8089 
8090             if (ShouldBeMemberOf) {
8091               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8092               // should be later updated with the correct value of MEMBER_OF.
8093               Flags |= OMP_MAP_MEMBER_OF;
8094               // From now on, all subsequent PTR_AND_OBJ entries should not be
8095               // marked as MEMBER_OF.
8096               ShouldBeMemberOf = false;
8097             }
8098           }
8099 
8100           CombinedInfo.Types.push_back(Flags);
8101         }
8102 
8103         // If we have encountered a member expression so far, keep track of the
8104         // mapped member. If the parent is "*this", then the value declaration
8105         // is nullptr.
8106         if (EncounteredME) {
8107           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8108           unsigned FieldIndex = FD->getFieldIndex();
8109 
8110           // Update info about the lowest and highest elements for this struct
8111           if (!PartialStruct.Base.isValid()) {
8112             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8113             if (IsFinalArraySection) {
8114               Address HB =
8115                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8116                       .getAddress(CGF);
8117               PartialStruct.HighestElem = {FieldIndex, HB};
8118             } else {
8119               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8120             }
8121             PartialStruct.Base = BP;
8122             PartialStruct.LB = BP;
8123           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8124             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8125           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8126             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8127           }
8128         }
8129 
8130         // Need to emit combined struct for array sections.
8131         if (IsFinalArraySection || IsNonContiguous)
8132           PartialStruct.IsArraySection = true;
8133 
8134         // If we have a final array section, we are done with this expression.
8135         if (IsFinalArraySection)
8136           break;
8137 
8138         // The pointer becomes the base for the next element.
8139         if (Next != CE)
8140           BP = IsMemberReference ? LowestElem : LB;
8141 
8142         IsExpressionFirstInfo = false;
8143         IsCaptureFirstInfo = false;
8144         FirstPointerInComplexData = false;
8145         IsPrevMemberReference = IsMemberReference;
8146       } else if (FirstPointerInComplexData) {
8147         QualType Ty = Components.rbegin()
8148                           ->getAssociatedDeclaration()
8149                           ->getType()
8150                           .getNonReferenceType();
8151         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8152         FirstPointerInComplexData = false;
8153       }
8154     }
8155     // If ran into the whole component - allocate the space for the whole
8156     // record.
8157     if (!EncounteredME)
8158       PartialStruct.HasCompleteRecord = true;
8159 
8160     if (!IsNonContiguous)
8161       return;
8162 
8163     const ASTContext &Context = CGF.getContext();
8164 
8165     // For supporting stride in array section, we need to initialize the first
8166     // dimension size as 1, first offset as 0, and first count as 1
8167     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8168     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8169     MapValuesArrayTy CurStrides;
8170     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8171     uint64_t ElementTypeSize;
8172 
8173     // Collect Size information for each dimension and get the element size as
8174     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8175     // should be [10, 10] and the first stride is 4 btyes.
8176     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8177          Components) {
8178       const Expr *AssocExpr = Component.getAssociatedExpression();
8179       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8180 
8181       if (!OASE)
8182         continue;
8183 
8184       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8185       auto *CAT = Context.getAsConstantArrayType(Ty);
8186       auto *VAT = Context.getAsVariableArrayType(Ty);
8187 
8188       // We need all the dimension size except for the last dimension.
8189       assert((VAT || CAT || &Component == &*Components.begin()) &&
8190              "Should be either ConstantArray or VariableArray if not the "
8191              "first Component");
8192 
8193       // Get element size if CurStrides is empty.
8194       if (CurStrides.empty()) {
8195         const Type *ElementType = nullptr;
8196         if (CAT)
8197           ElementType = CAT->getElementType().getTypePtr();
8198         else if (VAT)
8199           ElementType = VAT->getElementType().getTypePtr();
8200         else
8201           assert(&Component == &*Components.begin() &&
8202                  "Only expect pointer (non CAT or VAT) when this is the "
8203                  "first Component");
8204         // If ElementType is null, then it means the base is a pointer
8205         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8206         // for next iteration.
8207         if (ElementType) {
8208           // For the case that having pointer as base, we need to remove one
8209           // level of indirection.
8210           if (&Component != &*Components.begin())
8211             ElementType = ElementType->getPointeeOrArrayElementType();
8212           ElementTypeSize =
8213               Context.getTypeSizeInChars(ElementType).getQuantity();
8214           CurStrides.push_back(
8215               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8216         }
8217       }
8218       // Get dimension value except for the last dimension since we don't need
8219       // it.
8220       if (DimSizes.size() < Components.size() - 1) {
8221         if (CAT)
8222           DimSizes.push_back(llvm::ConstantInt::get(
8223               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8224         else if (VAT)
8225           DimSizes.push_back(CGF.Builder.CreateIntCast(
8226               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8227               /*IsSigned=*/false));
8228       }
8229     }
8230 
8231     // Skip the dummy dimension since we have already have its information.
8232     auto *DI = DimSizes.begin() + 1;
8233     // Product of dimension.
8234     llvm::Value *DimProd =
8235         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8236 
8237     // Collect info for non-contiguous. Notice that offset, count, and stride
8238     // are only meaningful for array-section, so we insert a null for anything
8239     // other than array-section.
8240     // Also, the size of offset, count, and stride are not the same as
8241     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8242     // count, and stride are the same as the number of non-contiguous
8243     // declaration in target update to/from clause.
8244     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8245          Components) {
8246       const Expr *AssocExpr = Component.getAssociatedExpression();
8247 
8248       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8249         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8250             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8251             /*isSigned=*/false);
8252         CurOffsets.push_back(Offset);
8253         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8254         CurStrides.push_back(CurStrides.back());
8255         continue;
8256       }
8257 
8258       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8259 
8260       if (!OASE)
8261         continue;
8262 
8263       // Offset
8264       const Expr *OffsetExpr = OASE->getLowerBound();
8265       llvm::Value *Offset = nullptr;
8266       if (!OffsetExpr) {
8267         // If offset is absent, then we just set it to zero.
8268         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8269       } else {
8270         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8271                                            CGF.Int64Ty,
8272                                            /*isSigned=*/false);
8273       }
8274       CurOffsets.push_back(Offset);
8275 
8276       // Count
8277       const Expr *CountExpr = OASE->getLength();
8278       llvm::Value *Count = nullptr;
8279       if (!CountExpr) {
8280         // In Clang, once a high dimension is an array section, we construct all
8281         // the lower dimension as array section, however, for case like
8282         // arr[0:2][2], Clang construct the inner dimension as an array section
8283         // but it actually is not in an array section form according to spec.
8284         if (!OASE->getColonLocFirst().isValid() &&
8285             !OASE->getColonLocSecond().isValid()) {
8286           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8287         } else {
8288           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8289           // When the length is absent it defaults to ⌈(size −
8290           // lower-bound)/stride⌉, where size is the size of the array
8291           // dimension.
8292           const Expr *StrideExpr = OASE->getStride();
8293           llvm::Value *Stride =
8294               StrideExpr
8295                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8296                                               CGF.Int64Ty, /*isSigned=*/false)
8297                   : nullptr;
8298           if (Stride)
8299             Count = CGF.Builder.CreateUDiv(
8300                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8301           else
8302             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8303         }
8304       } else {
8305         Count = CGF.EmitScalarExpr(CountExpr);
8306       }
8307       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8308       CurCounts.push_back(Count);
8309 
8310       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8311       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8312       //              Offset      Count     Stride
8313       //    D0          0           1         4    (int)    <- dummy dimension
8314       //    D1          0           2         8    (2 * (1) * 4)
8315       //    D2          1           2         20   (1 * (1 * 5) * 4)
8316       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8317       const Expr *StrideExpr = OASE->getStride();
8318       llvm::Value *Stride =
8319           StrideExpr
8320               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8321                                           CGF.Int64Ty, /*isSigned=*/false)
8322               : nullptr;
8323       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8324       if (Stride)
8325         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8326       else
8327         CurStrides.push_back(DimProd);
8328       if (DI != DimSizes.end())
8329         ++DI;
8330     }
8331 
8332     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8333     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8334     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8335   }
8336 
8337   /// Return the adjusted map modifiers if the declaration a capture refers to
8338   /// appears in a first-private clause. This is expected to be used only with
8339   /// directives that start with 'target'.
8340   MappableExprsHandler::OpenMPOffloadMappingFlags
8341   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8342     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8343 
8344     // A first private variable captured by reference will use only the
8345     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8346     // declaration is known as first-private in this handler.
8347     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8348       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8349         return MappableExprsHandler::OMP_MAP_TO |
8350                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8351       return MappableExprsHandler::OMP_MAP_PRIVATE |
8352              MappableExprsHandler::OMP_MAP_TO;
8353     }
8354     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8355     if (I != LambdasMap.end())
8356       // for map(to: lambda): using user specified map type.
8357       return getMapTypeBits(
8358           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8359           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8360           /*AddPtrFlag=*/false,
8361           /*AddIsTargetParamFlag=*/false,
8362           /*isNonContiguous=*/false);
8363     return MappableExprsHandler::OMP_MAP_TO |
8364            MappableExprsHandler::OMP_MAP_FROM;
8365   }
8366 
8367   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8368     // Rotate by getFlagMemberOffset() bits.
8369     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8370                                                   << getFlagMemberOffset());
8371   }
8372 
8373   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8374                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8375     // If the entry is PTR_AND_OBJ but has not been marked with the special
8376     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8377     // marked as MEMBER_OF.
8378     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8379         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8380       return;
8381 
8382     // Reset the placeholder value to prepare the flag for the assignment of the
8383     // proper MEMBER_OF value.
8384     Flags &= ~OMP_MAP_MEMBER_OF;
8385     Flags |= MemberOfFlag;
8386   }
8387 
8388   void getPlainLayout(const CXXRecordDecl *RD,
8389                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8390                       bool AsBase) const {
8391     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8392 
8393     llvm::StructType *St =
8394         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8395 
8396     unsigned NumElements = St->getNumElements();
8397     llvm::SmallVector<
8398         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8399         RecordLayout(NumElements);
8400 
8401     // Fill bases.
8402     for (const auto &I : RD->bases()) {
8403       if (I.isVirtual())
8404         continue;
8405       const auto *Base = I.getType()->getAsCXXRecordDecl();
8406       // Ignore empty bases.
8407       if (Base->isEmpty() || CGF.getContext()
8408                                  .getASTRecordLayout(Base)
8409                                  .getNonVirtualSize()
8410                                  .isZero())
8411         continue;
8412 
8413       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8414       RecordLayout[FieldIndex] = Base;
8415     }
8416     // Fill in virtual bases.
8417     for (const auto &I : RD->vbases()) {
8418       const auto *Base = I.getType()->getAsCXXRecordDecl();
8419       // Ignore empty bases.
8420       if (Base->isEmpty())
8421         continue;
8422       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8423       if (RecordLayout[FieldIndex])
8424         continue;
8425       RecordLayout[FieldIndex] = Base;
8426     }
8427     // Fill in all the fields.
8428     assert(!RD->isUnion() && "Unexpected union.");
8429     for (const auto *Field : RD->fields()) {
8430       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8431       // will fill in later.)
8432       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8433         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8434         RecordLayout[FieldIndex] = Field;
8435       }
8436     }
8437     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8438              &Data : RecordLayout) {
8439       if (Data.isNull())
8440         continue;
8441       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8442         getPlainLayout(Base, Layout, /*AsBase=*/true);
8443       else
8444         Layout.push_back(Data.get<const FieldDecl *>());
8445     }
8446   }
8447 
8448   /// Generate all the base pointers, section pointers, sizes, map types, and
8449   /// mappers for the extracted mappable expressions (all included in \a
8450   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8451   /// pair of the relevant declaration and index where it occurs is appended to
8452   /// the device pointers info array.
8453   void generateAllInfoForClauses(
8454       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8455       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8456           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8457     // We have to process the component lists that relate with the same
8458     // declaration in a single chunk so that we can generate the map flags
8459     // correctly. Therefore, we organize all lists in a map.
8460     enum MapKind { Present, Allocs, Other, Total };
8461     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8462                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8463         Info;
8464 
8465     // Helper function to fill the information map for the different supported
8466     // clauses.
8467     auto &&InfoGen =
8468         [&Info, &SkipVarSet](
8469             const ValueDecl *D, MapKind Kind,
8470             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8471             OpenMPMapClauseKind MapType,
8472             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8473             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8474             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8475             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8476           if (SkipVarSet.contains(D))
8477             return;
8478           auto It = Info.find(D);
8479           if (It == Info.end())
8480             It = Info
8481                      .insert(std::make_pair(
8482                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8483                      .first;
8484           It->second[Kind].emplace_back(
8485               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8486               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8487         };
8488 
8489     for (const auto *Cl : Clauses) {
8490       const auto *C = dyn_cast<OMPMapClause>(Cl);
8491       if (!C)
8492         continue;
8493       MapKind Kind = Other;
8494       if (llvm::is_contained(C->getMapTypeModifiers(),
8495                              OMPC_MAP_MODIFIER_present))
8496         Kind = Present;
8497       else if (C->getMapType() == OMPC_MAP_alloc)
8498         Kind = Allocs;
8499       const auto *EI = C->getVarRefs().begin();
8500       for (const auto L : C->component_lists()) {
8501         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8502         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8503                 C->getMapTypeModifiers(), llvm::None,
8504                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8505                 E);
8506         ++EI;
8507       }
8508     }
8509     for (const auto *Cl : Clauses) {
8510       const auto *C = dyn_cast<OMPToClause>(Cl);
8511       if (!C)
8512         continue;
8513       MapKind Kind = Other;
8514       if (llvm::is_contained(C->getMotionModifiers(),
8515                              OMPC_MOTION_MODIFIER_present))
8516         Kind = Present;
8517       const auto *EI = C->getVarRefs().begin();
8518       for (const auto L : C->component_lists()) {
8519         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8520                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8521                 C->isImplicit(), std::get<2>(L), *EI);
8522         ++EI;
8523       }
8524     }
8525     for (const auto *Cl : Clauses) {
8526       const auto *C = dyn_cast<OMPFromClause>(Cl);
8527       if (!C)
8528         continue;
8529       MapKind Kind = Other;
8530       if (llvm::is_contained(C->getMotionModifiers(),
8531                              OMPC_MOTION_MODIFIER_present))
8532         Kind = Present;
8533       const auto *EI = C->getVarRefs().begin();
8534       for (const auto L : C->component_lists()) {
8535         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8536                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8537                 C->isImplicit(), std::get<2>(L), *EI);
8538         ++EI;
8539       }
8540     }
8541 
8542     // Look at the use_device_ptr clause information and mark the existing map
8543     // entries as such. If there is no map information for an entry in the
8544     // use_device_ptr list, we create one with map type 'alloc' and zero size
8545     // section. It is the user fault if that was not mapped before. If there is
8546     // no map information and the pointer is a struct member, then we defer the
8547     // emission of that entry until the whole struct has been processed.
8548     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8549                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8550         DeferredInfo;
8551     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8552 
8553     for (const auto *Cl : Clauses) {
8554       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8555       if (!C)
8556         continue;
8557       for (const auto L : C->component_lists()) {
8558         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8559             std::get<1>(L);
8560         assert(!Components.empty() &&
8561                "Not expecting empty list of components!");
8562         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8563         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8564         const Expr *IE = Components.back().getAssociatedExpression();
8565         // If the first component is a member expression, we have to look into
8566         // 'this', which maps to null in the map of map information. Otherwise
8567         // look directly for the information.
8568         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8569 
8570         // We potentially have map information for this declaration already.
8571         // Look for the first set of components that refer to it.
8572         if (It != Info.end()) {
8573           bool Found = false;
8574           for (auto &Data : It->second) {
8575             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8576               return MI.Components.back().getAssociatedDeclaration() == VD;
8577             });
8578             // If we found a map entry, signal that the pointer has to be
8579             // returned and move on to the next declaration. Exclude cases where
8580             // the base pointer is mapped as array subscript, array section or
8581             // array shaping. The base address is passed as a pointer to base in
8582             // this case and cannot be used as a base for use_device_ptr list
8583             // item.
8584             if (CI != Data.end()) {
8585               auto PrevCI = std::next(CI->Components.rbegin());
8586               const auto *VarD = dyn_cast<VarDecl>(VD);
8587               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8588                   isa<MemberExpr>(IE) ||
8589                   !VD->getType().getNonReferenceType()->isPointerType() ||
8590                   PrevCI == CI->Components.rend() ||
8591                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8592                   VarD->hasLocalStorage()) {
8593                 CI->ReturnDevicePointer = true;
8594                 Found = true;
8595                 break;
8596               }
8597             }
8598           }
8599           if (Found)
8600             continue;
8601         }
8602 
8603         // We didn't find any match in our map information - generate a zero
8604         // size array section - if the pointer is a struct member we defer this
8605         // action until the whole struct has been processed.
8606         if (isa<MemberExpr>(IE)) {
8607           // Insert the pointer into Info to be processed by
8608           // generateInfoForComponentList. Because it is a member pointer
8609           // without a pointee, no entry will be generated for it, therefore
8610           // we need to generate one after the whole struct has been processed.
8611           // Nonetheless, generateInfoForComponentList must be called to take
8612           // the pointer into account for the calculation of the range of the
8613           // partial struct.
8614           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8615                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8616                   nullptr);
8617           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8618         } else {
8619           llvm::Value *Ptr =
8620               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8621           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8622           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8623           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8624           UseDevicePtrCombinedInfo.Sizes.push_back(
8625               llvm::Constant::getNullValue(CGF.Int64Ty));
8626           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8627           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8628         }
8629       }
8630     }
8631 
8632     // Look at the use_device_addr clause information and mark the existing map
8633     // entries as such. If there is no map information for an entry in the
8634     // use_device_addr list, we create one with map type 'alloc' and zero size
8635     // section. It is the user fault if that was not mapped before. If there is
8636     // no map information and the pointer is a struct member, then we defer the
8637     // emission of that entry until the whole struct has been processed.
8638     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8639     for (const auto *Cl : Clauses) {
8640       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8641       if (!C)
8642         continue;
8643       for (const auto L : C->component_lists()) {
8644         assert(!std::get<1>(L).empty() &&
8645                "Not expecting empty list of components!");
8646         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8647         if (!Processed.insert(VD).second)
8648           continue;
8649         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8650         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8651         // If the first component is a member expression, we have to look into
8652         // 'this', which maps to null in the map of map information. Otherwise
8653         // look directly for the information.
8654         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8655 
8656         // We potentially have map information for this declaration already.
8657         // Look for the first set of components that refer to it.
8658         if (It != Info.end()) {
8659           bool Found = false;
8660           for (auto &Data : It->second) {
8661             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8662               return MI.Components.back().getAssociatedDeclaration() == VD;
8663             });
8664             // If we found a map entry, signal that the pointer has to be
8665             // returned and move on to the next declaration.
8666             if (CI != Data.end()) {
8667               CI->ReturnDevicePointer = true;
8668               Found = true;
8669               break;
8670             }
8671           }
8672           if (Found)
8673             continue;
8674         }
8675 
8676         // We didn't find any match in our map information - generate a zero
8677         // size array section - if the pointer is a struct member we defer this
8678         // action until the whole struct has been processed.
8679         if (isa<MemberExpr>(IE)) {
8680           // Insert the pointer into Info to be processed by
8681           // generateInfoForComponentList. Because it is a member pointer
8682           // without a pointee, no entry will be generated for it, therefore
8683           // we need to generate one after the whole struct has been processed.
8684           // Nonetheless, generateInfoForComponentList must be called to take
8685           // the pointer into account for the calculation of the range of the
8686           // partial struct.
8687           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8688                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8689                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8690           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8691         } else {
8692           llvm::Value *Ptr;
8693           if (IE->isGLValue())
8694             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8695           else
8696             Ptr = CGF.EmitScalarExpr(IE);
8697           CombinedInfo.Exprs.push_back(VD);
8698           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8699           CombinedInfo.Pointers.push_back(Ptr);
8700           CombinedInfo.Sizes.push_back(
8701               llvm::Constant::getNullValue(CGF.Int64Ty));
8702           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8703           CombinedInfo.Mappers.push_back(nullptr);
8704         }
8705       }
8706     }
8707 
8708     for (const auto &Data : Info) {
8709       StructRangeInfoTy PartialStruct;
8710       // Temporary generated information.
8711       MapCombinedInfoTy CurInfo;
8712       const Decl *D = Data.first;
8713       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8714       for (const auto &M : Data.second) {
8715         for (const MapInfo &L : M) {
8716           assert(!L.Components.empty() &&
8717                  "Not expecting declaration with no component lists.");
8718 
8719           // Remember the current base pointer index.
8720           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8721           CurInfo.NonContigInfo.IsNonContiguous =
8722               L.Components.back().isNonContiguous();
8723           generateInfoForComponentList(
8724               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8725               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8726               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8727 
8728           // If this entry relates with a device pointer, set the relevant
8729           // declaration and add the 'return pointer' flag.
8730           if (L.ReturnDevicePointer) {
8731             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8732                    "Unexpected number of mapped base pointers.");
8733 
8734             const ValueDecl *RelevantVD =
8735                 L.Components.back().getAssociatedDeclaration();
8736             assert(RelevantVD &&
8737                    "No relevant declaration related with device pointer??");
8738 
8739             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8740                 RelevantVD);
8741             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8742           }
8743         }
8744       }
8745 
8746       // Append any pending zero-length pointers which are struct members and
8747       // used with use_device_ptr or use_device_addr.
8748       auto CI = DeferredInfo.find(Data.first);
8749       if (CI != DeferredInfo.end()) {
8750         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8751           llvm::Value *BasePtr;
8752           llvm::Value *Ptr;
8753           if (L.ForDeviceAddr) {
8754             if (L.IE->isGLValue())
8755               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8756             else
8757               Ptr = this->CGF.EmitScalarExpr(L.IE);
8758             BasePtr = Ptr;
8759             // Entry is RETURN_PARAM. Also, set the placeholder value
8760             // MEMBER_OF=FFFF so that the entry is later updated with the
8761             // correct value of MEMBER_OF.
8762             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8763           } else {
8764             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8765             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8766                                              L.IE->getExprLoc());
8767             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8768             // placeholder value MEMBER_OF=FFFF so that the entry is later
8769             // updated with the correct value of MEMBER_OF.
8770             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8771                                     OMP_MAP_MEMBER_OF);
8772           }
8773           CurInfo.Exprs.push_back(L.VD);
8774           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8775           CurInfo.Pointers.push_back(Ptr);
8776           CurInfo.Sizes.push_back(
8777               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8778           CurInfo.Mappers.push_back(nullptr);
8779         }
8780       }
8781       // If there is an entry in PartialStruct it means we have a struct with
8782       // individual members mapped. Emit an extra combined entry.
8783       if (PartialStruct.Base.isValid()) {
8784         CurInfo.NonContigInfo.Dims.push_back(0);
8785         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8786       }
8787 
8788       // We need to append the results of this capture to what we already
8789       // have.
8790       CombinedInfo.append(CurInfo);
8791     }
8792     // Append data for use_device_ptr clauses.
8793     CombinedInfo.append(UseDevicePtrCombinedInfo);
8794   }
8795 
8796 public:
8797   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8798       : CurDir(&Dir), CGF(CGF) {
8799     // Extract firstprivate clause information.
8800     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8801       for (const auto *D : C->varlists())
8802         FirstPrivateDecls.try_emplace(
8803             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8804     // Extract implicit firstprivates from uses_allocators clauses.
8805     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8806       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8807         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8808         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8809           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8810                                         /*Implicit=*/true);
8811         else if (const auto *VD = dyn_cast<VarDecl>(
8812                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8813                          ->getDecl()))
8814           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8815       }
8816     }
8817     // Extract device pointer clause information.
8818     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8819       for (auto L : C->component_lists())
8820         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8821     // Extract map information.
8822     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8823       if (C->getMapType() != OMPC_MAP_to)
8824         continue;
8825       for (auto L : C->component_lists()) {
8826         const ValueDecl *VD = std::get<0>(L);
8827         const auto *RD = VD ? VD->getType()
8828                                   .getCanonicalType()
8829                                   .getNonReferenceType()
8830                                   ->getAsCXXRecordDecl()
8831                             : nullptr;
8832         if (RD && RD->isLambda())
8833           LambdasMap.try_emplace(std::get<0>(L), C);
8834       }
8835     }
8836   }
8837 
8838   /// Constructor for the declare mapper directive.
8839   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8840       : CurDir(&Dir), CGF(CGF) {}
8841 
8842   /// Generate code for the combined entry if we have a partially mapped struct
8843   /// and take care of the mapping flags of the arguments corresponding to
8844   /// individual struct members.
8845   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8846                          MapFlagsArrayTy &CurTypes,
8847                          const StructRangeInfoTy &PartialStruct,
8848                          const ValueDecl *VD = nullptr,
8849                          bool NotTargetParams = true) const {
8850     if (CurTypes.size() == 1 &&
8851         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8852         !PartialStruct.IsArraySection)
8853       return;
8854     Address LBAddr = PartialStruct.LowestElem.second;
8855     Address HBAddr = PartialStruct.HighestElem.second;
8856     if (PartialStruct.HasCompleteRecord) {
8857       LBAddr = PartialStruct.LB;
8858       HBAddr = PartialStruct.LB;
8859     }
8860     CombinedInfo.Exprs.push_back(VD);
8861     // Base is the base of the struct
8862     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8863     // Pointer is the address of the lowest element
8864     llvm::Value *LB = LBAddr.getPointer();
8865     CombinedInfo.Pointers.push_back(LB);
8866     // There should not be a mapper for a combined entry.
8867     CombinedInfo.Mappers.push_back(nullptr);
8868     // Size is (addr of {highest+1} element) - (addr of lowest element)
8869     llvm::Value *HB = HBAddr.getPointer();
8870     llvm::Value *HAddr =
8871         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8872     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8873     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8874     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8875     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8876                                                   /*isSigned=*/false);
8877     CombinedInfo.Sizes.push_back(Size);
8878     // Map type is always TARGET_PARAM, if generate info for captures.
8879     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8880                                                  : OMP_MAP_TARGET_PARAM);
8881     // If any element has the present modifier, then make sure the runtime
8882     // doesn't attempt to allocate the struct.
8883     if (CurTypes.end() !=
8884         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8885           return Type & OMP_MAP_PRESENT;
8886         }))
8887       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8888     // Remove TARGET_PARAM flag from the first element
8889     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8890     // If any element has the ompx_hold modifier, then make sure the runtime
8891     // uses the hold reference count for the struct as a whole so that it won't
8892     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8893     // elements as well so the runtime knows which reference count to check
8894     // when determining whether it's time for device-to-host transfers of
8895     // individual elements.
8896     if (CurTypes.end() !=
8897         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8898           return Type & OMP_MAP_OMPX_HOLD;
8899         })) {
8900       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8901       for (auto &M : CurTypes)
8902         M |= OMP_MAP_OMPX_HOLD;
8903     }
8904 
8905     // All other current entries will be MEMBER_OF the combined entry
8906     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8907     // 0xFFFF in the MEMBER_OF field).
8908     OpenMPOffloadMappingFlags MemberOfFlag =
8909         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8910     for (auto &M : CurTypes)
8911       setCorrectMemberOfFlag(M, MemberOfFlag);
8912   }
8913 
8914   /// Generate all the base pointers, section pointers, sizes, map types, and
8915   /// mappers for the extracted mappable expressions (all included in \a
8916   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8917   /// pair of the relevant declaration and index where it occurs is appended to
8918   /// the device pointers info array.
8919   void generateAllInfo(
8920       MapCombinedInfoTy &CombinedInfo,
8921       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8922           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8923     assert(CurDir.is<const OMPExecutableDirective *>() &&
8924            "Expect a executable directive");
8925     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8926     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8927   }
8928 
8929   /// Generate all the base pointers, section pointers, sizes, map types, and
8930   /// mappers for the extracted map clauses of user-defined mapper (all included
8931   /// in \a CombinedInfo).
8932   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8933     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8934            "Expect a declare mapper directive");
8935     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8936     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8937   }
8938 
8939   /// Emit capture info for lambdas for variables captured by reference.
8940   void generateInfoForLambdaCaptures(
8941       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8942       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8943     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8944     const auto *RD = VDType->getAsCXXRecordDecl();
8945     if (!RD || !RD->isLambda())
8946       return;
8947     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8948                    CGF.getContext().getDeclAlign(VD));
8949     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8950     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8951     FieldDecl *ThisCapture = nullptr;
8952     RD->getCaptureFields(Captures, ThisCapture);
8953     if (ThisCapture) {
8954       LValue ThisLVal =
8955           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8956       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8957       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8958                                  VDLVal.getPointer(CGF));
8959       CombinedInfo.Exprs.push_back(VD);
8960       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8961       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8962       CombinedInfo.Sizes.push_back(
8963           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8964                                     CGF.Int64Ty, /*isSigned=*/true));
8965       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8966                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8967       CombinedInfo.Mappers.push_back(nullptr);
8968     }
8969     for (const LambdaCapture &LC : RD->captures()) {
8970       if (!LC.capturesVariable())
8971         continue;
8972       const VarDecl *VD = LC.getCapturedVar();
8973       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8974         continue;
8975       auto It = Captures.find(VD);
8976       assert(It != Captures.end() && "Found lambda capture without field.");
8977       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8978       if (LC.getCaptureKind() == LCK_ByRef) {
8979         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8980         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8981                                    VDLVal.getPointer(CGF));
8982         CombinedInfo.Exprs.push_back(VD);
8983         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8984         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8985         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8986             CGF.getTypeSize(
8987                 VD->getType().getCanonicalType().getNonReferenceType()),
8988             CGF.Int64Ty, /*isSigned=*/true));
8989       } else {
8990         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8991         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8992                                    VDLVal.getPointer(CGF));
8993         CombinedInfo.Exprs.push_back(VD);
8994         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8995         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8996         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8997       }
8998       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8999                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9000       CombinedInfo.Mappers.push_back(nullptr);
9001     }
9002   }
9003 
9004   /// Set correct indices for lambdas captures.
9005   void adjustMemberOfForLambdaCaptures(
9006       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9007       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9008       MapFlagsArrayTy &Types) const {
9009     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9010       // Set correct member_of idx for all implicit lambda captures.
9011       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9012                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9013         continue;
9014       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9015       assert(BasePtr && "Unable to find base lambda address.");
9016       int TgtIdx = -1;
9017       for (unsigned J = I; J > 0; --J) {
9018         unsigned Idx = J - 1;
9019         if (Pointers[Idx] != BasePtr)
9020           continue;
9021         TgtIdx = Idx;
9022         break;
9023       }
9024       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9025       // All other current entries will be MEMBER_OF the combined entry
9026       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9027       // 0xFFFF in the MEMBER_OF field).
9028       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9029       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9030     }
9031   }
9032 
9033   /// Generate the base pointers, section pointers, sizes, map types, and
9034   /// mappers associated to a given capture (all included in \a CombinedInfo).
9035   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9036                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9037                               StructRangeInfoTy &PartialStruct) const {
9038     assert(!Cap->capturesVariableArrayType() &&
9039            "Not expecting to generate map info for a variable array type!");
9040 
9041     // We need to know when we generating information for the first component
9042     const ValueDecl *VD = Cap->capturesThis()
9043                               ? nullptr
9044                               : Cap->getCapturedVar()->getCanonicalDecl();
9045 
9046     // for map(to: lambda): skip here, processing it in
9047     // generateDefaultMapInfo
9048     if (LambdasMap.count(VD))
9049       return;
9050 
9051     // If this declaration appears in a is_device_ptr clause we just have to
9052     // pass the pointer by value. If it is a reference to a declaration, we just
9053     // pass its value.
9054     if (DevPointersMap.count(VD)) {
9055       CombinedInfo.Exprs.push_back(VD);
9056       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9057       CombinedInfo.Pointers.push_back(Arg);
9058       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9059           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9060           /*isSigned=*/true));
9061       CombinedInfo.Types.push_back(
9062           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9063           OMP_MAP_TARGET_PARAM);
9064       CombinedInfo.Mappers.push_back(nullptr);
9065       return;
9066     }
9067 
9068     using MapData =
9069         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9070                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9071                    const ValueDecl *, const Expr *>;
9072     SmallVector<MapData, 4> DeclComponentLists;
9073     assert(CurDir.is<const OMPExecutableDirective *>() &&
9074            "Expect a executable directive");
9075     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9076     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9077       const auto *EI = C->getVarRefs().begin();
9078       for (const auto L : C->decl_component_lists(VD)) {
9079         const ValueDecl *VDecl, *Mapper;
9080         // The Expression is not correct if the mapping is implicit
9081         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9082         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9083         std::tie(VDecl, Components, Mapper) = L;
9084         assert(VDecl == VD && "We got information for the wrong declaration??");
9085         assert(!Components.empty() &&
9086                "Not expecting declaration with no component lists.");
9087         DeclComponentLists.emplace_back(Components, C->getMapType(),
9088                                         C->getMapTypeModifiers(),
9089                                         C->isImplicit(), Mapper, E);
9090         ++EI;
9091       }
9092     }
9093     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9094                                              const MapData &RHS) {
9095       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9096       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9097       bool HasPresent =
9098           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9099       bool HasAllocs = MapType == OMPC_MAP_alloc;
9100       MapModifiers = std::get<2>(RHS);
9101       MapType = std::get<1>(LHS);
9102       bool HasPresentR =
9103           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9104       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9105       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9106     });
9107 
9108     // Find overlapping elements (including the offset from the base element).
9109     llvm::SmallDenseMap<
9110         const MapData *,
9111         llvm::SmallVector<
9112             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9113         4>
9114         OverlappedData;
9115     size_t Count = 0;
9116     for (const MapData &L : DeclComponentLists) {
9117       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9118       OpenMPMapClauseKind MapType;
9119       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9120       bool IsImplicit;
9121       const ValueDecl *Mapper;
9122       const Expr *VarRef;
9123       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9124           L;
9125       ++Count;
9126       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9127         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9128         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9129                  VarRef) = L1;
9130         auto CI = Components.rbegin();
9131         auto CE = Components.rend();
9132         auto SI = Components1.rbegin();
9133         auto SE = Components1.rend();
9134         for (; CI != CE && SI != SE; ++CI, ++SI) {
9135           if (CI->getAssociatedExpression()->getStmtClass() !=
9136               SI->getAssociatedExpression()->getStmtClass())
9137             break;
9138           // Are we dealing with different variables/fields?
9139           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9140             break;
9141         }
9142         // Found overlapping if, at least for one component, reached the head
9143         // of the components list.
9144         if (CI == CE || SI == SE) {
9145           // Ignore it if it is the same component.
9146           if (CI == CE && SI == SE)
9147             continue;
9148           const auto It = (SI == SE) ? CI : SI;
9149           // If one component is a pointer and another one is a kind of
9150           // dereference of this pointer (array subscript, section, dereference,
9151           // etc.), it is not an overlapping.
9152           // Same, if one component is a base and another component is a
9153           // dereferenced pointer memberexpr with the same base.
9154           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9155               (std::prev(It)->getAssociatedDeclaration() &&
9156                std::prev(It)
9157                    ->getAssociatedDeclaration()
9158                    ->getType()
9159                    ->isPointerType()) ||
9160               (It->getAssociatedDeclaration() &&
9161                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9162                std::next(It) != CE && std::next(It) != SE))
9163             continue;
9164           const MapData &BaseData = CI == CE ? L : L1;
9165           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9166               SI == SE ? Components : Components1;
9167           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9168           OverlappedElements.getSecond().push_back(SubData);
9169         }
9170       }
9171     }
9172     // Sort the overlapped elements for each item.
9173     llvm::SmallVector<const FieldDecl *, 4> Layout;
9174     if (!OverlappedData.empty()) {
9175       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9176       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9177       while (BaseType != OrigType) {
9178         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9179         OrigType = BaseType->getPointeeOrArrayElementType();
9180       }
9181 
9182       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9183         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9184       else {
9185         const auto *RD = BaseType->getAsRecordDecl();
9186         Layout.append(RD->field_begin(), RD->field_end());
9187       }
9188     }
9189     for (auto &Pair : OverlappedData) {
9190       llvm::stable_sort(
9191           Pair.getSecond(),
9192           [&Layout](
9193               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9194               OMPClauseMappableExprCommon::MappableExprComponentListRef
9195                   Second) {
9196             auto CI = First.rbegin();
9197             auto CE = First.rend();
9198             auto SI = Second.rbegin();
9199             auto SE = Second.rend();
9200             for (; CI != CE && SI != SE; ++CI, ++SI) {
9201               if (CI->getAssociatedExpression()->getStmtClass() !=
9202                   SI->getAssociatedExpression()->getStmtClass())
9203                 break;
9204               // Are we dealing with different variables/fields?
9205               if (CI->getAssociatedDeclaration() !=
9206                   SI->getAssociatedDeclaration())
9207                 break;
9208             }
9209 
9210             // Lists contain the same elements.
9211             if (CI == CE && SI == SE)
9212               return false;
9213 
9214             // List with less elements is less than list with more elements.
9215             if (CI == CE || SI == SE)
9216               return CI == CE;
9217 
9218             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9219             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9220             if (FD1->getParent() == FD2->getParent())
9221               return FD1->getFieldIndex() < FD2->getFieldIndex();
9222             const auto *It =
9223                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9224                   return FD == FD1 || FD == FD2;
9225                 });
9226             return *It == FD1;
9227           });
9228     }
9229 
9230     // Associated with a capture, because the mapping flags depend on it.
9231     // Go through all of the elements with the overlapped elements.
9232     bool IsFirstComponentList = true;
9233     for (const auto &Pair : OverlappedData) {
9234       const MapData &L = *Pair.getFirst();
9235       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9236       OpenMPMapClauseKind MapType;
9237       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9238       bool IsImplicit;
9239       const ValueDecl *Mapper;
9240       const Expr *VarRef;
9241       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9242           L;
9243       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9244           OverlappedComponents = Pair.getSecond();
9245       generateInfoForComponentList(
9246           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9247           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9248           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9249       IsFirstComponentList = false;
9250     }
9251     // Go through other elements without overlapped elements.
9252     for (const MapData &L : DeclComponentLists) {
9253       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9254       OpenMPMapClauseKind MapType;
9255       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9256       bool IsImplicit;
9257       const ValueDecl *Mapper;
9258       const Expr *VarRef;
9259       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9260           L;
9261       auto It = OverlappedData.find(&L);
9262       if (It == OverlappedData.end())
9263         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9264                                      Components, CombinedInfo, PartialStruct,
9265                                      IsFirstComponentList, IsImplicit, Mapper,
9266                                      /*ForDeviceAddr=*/false, VD, VarRef);
9267       IsFirstComponentList = false;
9268     }
9269   }
9270 
9271   /// Generate the default map information for a given capture \a CI,
9272   /// record field declaration \a RI and captured value \a CV.
9273   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9274                               const FieldDecl &RI, llvm::Value *CV,
9275                               MapCombinedInfoTy &CombinedInfo) const {
9276     bool IsImplicit = true;
9277     // Do the default mapping.
9278     if (CI.capturesThis()) {
9279       CombinedInfo.Exprs.push_back(nullptr);
9280       CombinedInfo.BasePointers.push_back(CV);
9281       CombinedInfo.Pointers.push_back(CV);
9282       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9283       CombinedInfo.Sizes.push_back(
9284           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9285                                     CGF.Int64Ty, /*isSigned=*/true));
9286       // Default map type.
9287       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9288     } else if (CI.capturesVariableByCopy()) {
9289       const VarDecl *VD = CI.getCapturedVar();
9290       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9291       CombinedInfo.BasePointers.push_back(CV);
9292       CombinedInfo.Pointers.push_back(CV);
9293       if (!RI.getType()->isAnyPointerType()) {
9294         // We have to signal to the runtime captures passed by value that are
9295         // not pointers.
9296         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9297         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9298             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9299       } else {
9300         // Pointers are implicitly mapped with a zero size and no flags
9301         // (other than first map that is added for all implicit maps).
9302         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9303         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9304       }
9305       auto I = FirstPrivateDecls.find(VD);
9306       if (I != FirstPrivateDecls.end())
9307         IsImplicit = I->getSecond();
9308     } else {
9309       assert(CI.capturesVariable() && "Expected captured reference.");
9310       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9311       QualType ElementType = PtrTy->getPointeeType();
9312       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9313           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9314       // The default map type for a scalar/complex type is 'to' because by
9315       // default the value doesn't have to be retrieved. For an aggregate
9316       // type, the default is 'tofrom'.
9317       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9318       const VarDecl *VD = CI.getCapturedVar();
9319       auto I = FirstPrivateDecls.find(VD);
9320       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9321       CombinedInfo.BasePointers.push_back(CV);
9322       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9323         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9324             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9325             AlignmentSource::Decl));
9326         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9327       } else {
9328         CombinedInfo.Pointers.push_back(CV);
9329       }
9330       if (I != FirstPrivateDecls.end())
9331         IsImplicit = I->getSecond();
9332     }
9333     // Every default map produces a single argument which is a target parameter.
9334     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9335 
9336     // Add flag stating this is an implicit map.
9337     if (IsImplicit)
9338       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9339 
9340     // No user-defined mapper for default mapping.
9341     CombinedInfo.Mappers.push_back(nullptr);
9342   }
9343 };
9344 } // anonymous namespace
9345 
9346 static void emitNonContiguousDescriptor(
9347     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9348     CGOpenMPRuntime::TargetDataInfo &Info) {
9349   CodeGenModule &CGM = CGF.CGM;
9350   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9351       &NonContigInfo = CombinedInfo.NonContigInfo;
9352 
9353   // Build an array of struct descriptor_dim and then assign it to
9354   // offload_args.
9355   //
9356   // struct descriptor_dim {
9357   //  uint64_t offset;
9358   //  uint64_t count;
9359   //  uint64_t stride
9360   // };
9361   ASTContext &C = CGF.getContext();
9362   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9363   RecordDecl *RD;
9364   RD = C.buildImplicitRecord("descriptor_dim");
9365   RD->startDefinition();
9366   addFieldToRecordDecl(C, RD, Int64Ty);
9367   addFieldToRecordDecl(C, RD, Int64Ty);
9368   addFieldToRecordDecl(C, RD, Int64Ty);
9369   RD->completeDefinition();
9370   QualType DimTy = C.getRecordType(RD);
9371 
9372   enum { OffsetFD = 0, CountFD, StrideFD };
9373   // We need two index variable here since the size of "Dims" is the same as the
9374   // size of Components, however, the size of offset, count, and stride is equal
9375   // to the size of base declaration that is non-contiguous.
9376   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9377     // Skip emitting ir if dimension size is 1 since it cannot be
9378     // non-contiguous.
9379     if (NonContigInfo.Dims[I] == 1)
9380       continue;
9381     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9382     QualType ArrayTy =
9383         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9384     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9385     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9386       unsigned RevIdx = EE - II - 1;
9387       LValue DimsLVal = CGF.MakeAddrLValue(
9388           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9389       // Offset
9390       LValue OffsetLVal = CGF.EmitLValueForField(
9391           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9392       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9393       // Count
9394       LValue CountLVal = CGF.EmitLValueForField(
9395           DimsLVal, *std::next(RD->field_begin(), CountFD));
9396       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9397       // Stride
9398       LValue StrideLVal = CGF.EmitLValueForField(
9399           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9400       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9401     }
9402     // args[I] = &dims
9403     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9404         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9405     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9406         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9407         Info.PointersArray, 0, I);
9408     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9409     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9410     ++L;
9411   }
9412 }
9413 
9414 // Try to extract the base declaration from a `this->x` expression if possible.
9415 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9416   if (!E)
9417     return nullptr;
9418 
9419   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9420     if (const MemberExpr *ME =
9421             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9422       return ME->getMemberDecl();
9423   return nullptr;
9424 }
9425 
9426 /// Emit a string constant containing the names of the values mapped to the
9427 /// offloading runtime library.
9428 llvm::Constant *
9429 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9430                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9431 
9432   uint32_t SrcLocStrSize;
9433   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9434     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9435 
9436   SourceLocation Loc;
9437   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9438     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9439       Loc = VD->getLocation();
9440     else
9441       Loc = MapExprs.getMapExpr()->getExprLoc();
9442   } else {
9443     Loc = MapExprs.getMapDecl()->getLocation();
9444   }
9445 
9446   std::string ExprName;
9447   if (MapExprs.getMapExpr()) {
9448     PrintingPolicy P(CGF.getContext().getLangOpts());
9449     llvm::raw_string_ostream OS(ExprName);
9450     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9451     OS.flush();
9452   } else {
9453     ExprName = MapExprs.getMapDecl()->getNameAsString();
9454   }
9455 
9456   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9457   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9458                                          PLoc.getLine(), PLoc.getColumn(),
9459                                          SrcLocStrSize);
9460 }
9461 
9462 /// Emit the arrays used to pass the captures and map information to the
9463 /// offloading runtime library. If there is no map or capture information,
9464 /// return nullptr by reference.
9465 static void emitOffloadingArrays(
9466     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9467     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9468     bool IsNonContiguous = false) {
9469   CodeGenModule &CGM = CGF.CGM;
9470   ASTContext &Ctx = CGF.getContext();
9471 
9472   // Reset the array information.
9473   Info.clearArrayInfo();
9474   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9475 
9476   if (Info.NumberOfPtrs) {
9477     // Detect if we have any capture size requiring runtime evaluation of the
9478     // size so that a constant array could be eventually used.
9479 
9480     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9481     QualType PointerArrayType = Ctx.getConstantArrayType(
9482         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9483         /*IndexTypeQuals=*/0);
9484 
9485     Info.BasePointersArray =
9486         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9487     Info.PointersArray =
9488         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9489     Address MappersArray =
9490         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9491     Info.MappersArray = MappersArray.getPointer();
9492 
9493     // If we don't have any VLA types or other types that require runtime
9494     // evaluation, we can use a constant array for the map sizes, otherwise we
9495     // need to fill up the arrays as we do for the pointers.
9496     QualType Int64Ty =
9497         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9498     SmallVector<llvm::Constant *> ConstSizes(
9499         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9500     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9501     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9502       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9503         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9504           if (IsNonContiguous && (CombinedInfo.Types[I] &
9505                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9506             ConstSizes[I] = llvm::ConstantInt::get(
9507                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9508           else
9509             ConstSizes[I] = CI;
9510           continue;
9511         }
9512       }
9513       RuntimeSizes.set(I);
9514     }
9515 
9516     if (RuntimeSizes.all()) {
9517       QualType SizeArrayType = Ctx.getConstantArrayType(
9518           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9519           /*IndexTypeQuals=*/0);
9520       Info.SizesArray =
9521           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9522     } else {
9523       auto *SizesArrayInit = llvm::ConstantArray::get(
9524           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9525       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9526       auto *SizesArrayGbl = new llvm::GlobalVariable(
9527           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9528           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9529       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9530       if (RuntimeSizes.any()) {
9531         QualType SizeArrayType = Ctx.getConstantArrayType(
9532             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9533             /*IndexTypeQuals=*/0);
9534         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9535         llvm::Value *GblConstPtr =
9536             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9537                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9538         CGF.Builder.CreateMemCpy(
9539             Buffer,
9540             Address(GblConstPtr, CGM.Int64Ty,
9541                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9542                         /*DestWidth=*/64, /*Signed=*/false))),
9543             CGF.getTypeSize(SizeArrayType));
9544         Info.SizesArray = Buffer.getPointer();
9545       } else {
9546         Info.SizesArray = SizesArrayGbl;
9547       }
9548     }
9549 
9550     // The map types are always constant so we don't need to generate code to
9551     // fill arrays. Instead, we create an array constant.
9552     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9553     llvm::copy(CombinedInfo.Types, Mapping.begin());
9554     std::string MaptypesName =
9555         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9556     auto *MapTypesArrayGbl =
9557         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9558     Info.MapTypesArray = MapTypesArrayGbl;
9559 
9560     // The information types are only built if there is debug information
9561     // requested.
9562     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9563       Info.MapNamesArray = llvm::Constant::getNullValue(
9564           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9565     } else {
9566       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9567         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9568       };
9569       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9570       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9571       std::string MapnamesName =
9572           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9573       auto *MapNamesArrayGbl =
9574           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9575       Info.MapNamesArray = MapNamesArrayGbl;
9576     }
9577 
9578     // If there's a present map type modifier, it must not be applied to the end
9579     // of a region, so generate a separate map type array in that case.
9580     if (Info.separateBeginEndCalls()) {
9581       bool EndMapTypesDiffer = false;
9582       for (uint64_t &Type : Mapping) {
9583         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9584           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9585           EndMapTypesDiffer = true;
9586         }
9587       }
9588       if (EndMapTypesDiffer) {
9589         MapTypesArrayGbl =
9590             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9591         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9592       }
9593     }
9594 
9595     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9596       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9597       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9598           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9599           Info.BasePointersArray, 0, I);
9600       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9601           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9602       Address BPAddr(BP, BPVal->getType(),
9603                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9604       CGF.Builder.CreateStore(BPVal, BPAddr);
9605 
9606       if (Info.requiresDevicePointerInfo())
9607         if (const ValueDecl *DevVD =
9608                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9609           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9610 
9611       llvm::Value *PVal = CombinedInfo.Pointers[I];
9612       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9613           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9614           Info.PointersArray, 0, I);
9615       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9616           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9617       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9618       CGF.Builder.CreateStore(PVal, PAddr);
9619 
9620       if (RuntimeSizes.test(I)) {
9621         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9622             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9623             Info.SizesArray,
9624             /*Idx0=*/0,
9625             /*Idx1=*/I);
9626         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9627         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9628                                                           CGM.Int64Ty,
9629                                                           /*isSigned=*/true),
9630                                 SAddr);
9631       }
9632 
9633       // Fill up the mapper array.
9634       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9635       if (CombinedInfo.Mappers[I]) {
9636         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9637             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9638         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9639         Info.HasMapper = true;
9640       }
9641       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9642       CGF.Builder.CreateStore(MFunc, MAddr);
9643     }
9644   }
9645 
9646   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9647       Info.NumberOfPtrs == 0)
9648     return;
9649 
9650   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9651 }
9652 
9653 namespace {
9654 /// Additional arguments for emitOffloadingArraysArgument function.
9655 struct ArgumentsOptions {
9656   bool ForEndCall = false;
9657   ArgumentsOptions() = default;
9658   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9659 };
9660 } // namespace
9661 
9662 /// Emit the arguments to be passed to the runtime library based on the
9663 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9664 /// ForEndCall, emit map types to be passed for the end of the region instead of
9665 /// the beginning.
9666 static void emitOffloadingArraysArgument(
9667     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9668     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9669     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9670     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9671     const ArgumentsOptions &Options = ArgumentsOptions()) {
9672   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9673          "expected region end call to runtime only when end call is separate");
9674   CodeGenModule &CGM = CGF.CGM;
9675   if (Info.NumberOfPtrs) {
9676     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9677         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9678         Info.BasePointersArray,
9679         /*Idx0=*/0, /*Idx1=*/0);
9680     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9681         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9682         Info.PointersArray,
9683         /*Idx0=*/0,
9684         /*Idx1=*/0);
9685     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9686         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9687         /*Idx0=*/0, /*Idx1=*/0);
9688     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9689         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9690         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9691                                                     : Info.MapTypesArray,
9692         /*Idx0=*/0,
9693         /*Idx1=*/0);
9694 
9695     // Only emit the mapper information arrays if debug information is
9696     // requested.
9697     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9698       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9699     else
9700       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9701           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9702           Info.MapNamesArray,
9703           /*Idx0=*/0,
9704           /*Idx1=*/0);
9705     // If there is no user-defined mapper, set the mapper array to nullptr to
9706     // avoid an unnecessary data privatization
9707     if (!Info.HasMapper)
9708       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9709     else
9710       MappersArrayArg =
9711           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9712   } else {
9713     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9714     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9715     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9716     MapTypesArrayArg =
9717         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9718     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9719     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9720   }
9721 }
9722 
9723 /// Check for inner distribute directive.
9724 static const OMPExecutableDirective *
9725 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9726   const auto *CS = D.getInnermostCapturedStmt();
9727   const auto *Body =
9728       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9729   const Stmt *ChildStmt =
9730       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9731 
9732   if (const auto *NestedDir =
9733           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9734     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9735     switch (D.getDirectiveKind()) {
9736     case OMPD_target:
9737       if (isOpenMPDistributeDirective(DKind))
9738         return NestedDir;
9739       if (DKind == OMPD_teams) {
9740         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9741             /*IgnoreCaptured=*/true);
9742         if (!Body)
9743           return nullptr;
9744         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9745         if (const auto *NND =
9746                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9747           DKind = NND->getDirectiveKind();
9748           if (isOpenMPDistributeDirective(DKind))
9749             return NND;
9750         }
9751       }
9752       return nullptr;
9753     case OMPD_target_teams:
9754       if (isOpenMPDistributeDirective(DKind))
9755         return NestedDir;
9756       return nullptr;
9757     case OMPD_target_parallel:
9758     case OMPD_target_simd:
9759     case OMPD_target_parallel_for:
9760     case OMPD_target_parallel_for_simd:
9761       return nullptr;
9762     case OMPD_target_teams_distribute:
9763     case OMPD_target_teams_distribute_simd:
9764     case OMPD_target_teams_distribute_parallel_for:
9765     case OMPD_target_teams_distribute_parallel_for_simd:
9766     case OMPD_parallel:
9767     case OMPD_for:
9768     case OMPD_parallel_for:
9769     case OMPD_parallel_master:
9770     case OMPD_parallel_sections:
9771     case OMPD_for_simd:
9772     case OMPD_parallel_for_simd:
9773     case OMPD_cancel:
9774     case OMPD_cancellation_point:
9775     case OMPD_ordered:
9776     case OMPD_threadprivate:
9777     case OMPD_allocate:
9778     case OMPD_task:
9779     case OMPD_simd:
9780     case OMPD_tile:
9781     case OMPD_unroll:
9782     case OMPD_sections:
9783     case OMPD_section:
9784     case OMPD_single:
9785     case OMPD_master:
9786     case OMPD_critical:
9787     case OMPD_taskyield:
9788     case OMPD_barrier:
9789     case OMPD_taskwait:
9790     case OMPD_taskgroup:
9791     case OMPD_atomic:
9792     case OMPD_flush:
9793     case OMPD_depobj:
9794     case OMPD_scan:
9795     case OMPD_teams:
9796     case OMPD_target_data:
9797     case OMPD_target_exit_data:
9798     case OMPD_target_enter_data:
9799     case OMPD_distribute:
9800     case OMPD_distribute_simd:
9801     case OMPD_distribute_parallel_for:
9802     case OMPD_distribute_parallel_for_simd:
9803     case OMPD_teams_distribute:
9804     case OMPD_teams_distribute_simd:
9805     case OMPD_teams_distribute_parallel_for:
9806     case OMPD_teams_distribute_parallel_for_simd:
9807     case OMPD_target_update:
9808     case OMPD_declare_simd:
9809     case OMPD_declare_variant:
9810     case OMPD_begin_declare_variant:
9811     case OMPD_end_declare_variant:
9812     case OMPD_declare_target:
9813     case OMPD_end_declare_target:
9814     case OMPD_declare_reduction:
9815     case OMPD_declare_mapper:
9816     case OMPD_taskloop:
9817     case OMPD_taskloop_simd:
9818     case OMPD_master_taskloop:
9819     case OMPD_master_taskloop_simd:
9820     case OMPD_parallel_master_taskloop:
9821     case OMPD_parallel_master_taskloop_simd:
9822     case OMPD_requires:
9823     case OMPD_metadirective:
9824     case OMPD_unknown:
9825     default:
9826       llvm_unreachable("Unexpected directive.");
9827     }
9828   }
9829 
9830   return nullptr;
9831 }
9832 
9833 /// Emit the user-defined mapper function. The code generation follows the
9834 /// pattern in the example below.
9835 /// \code
9836 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9837 ///                                           void *base, void *begin,
9838 ///                                           int64_t size, int64_t type,
9839 ///                                           void *name = nullptr) {
9840 ///   // Allocate space for an array section first or add a base/begin for
9841 ///   // pointer dereference.
9842 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9843 ///       !maptype.IsDelete)
9844 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9845 ///                                 size*sizeof(Ty), clearToFromMember(type));
9846 ///   // Map members.
9847 ///   for (unsigned i = 0; i < size; i++) {
9848 ///     // For each component specified by this mapper:
9849 ///     for (auto c : begin[i]->all_components) {
9850 ///       if (c.hasMapper())
9851 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9852 ///                       c.arg_type, c.arg_name);
9853 ///       else
9854 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9855 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9856 ///                                     c.arg_name);
9857 ///     }
9858 ///   }
9859 ///   // Delete the array section.
9860 ///   if (size > 1 && maptype.IsDelete)
9861 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9862 ///                                 size*sizeof(Ty), clearToFromMember(type));
9863 /// }
9864 /// \endcode
9865 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9866                                             CodeGenFunction *CGF) {
9867   if (UDMMap.count(D) > 0)
9868     return;
9869   ASTContext &C = CGM.getContext();
9870   QualType Ty = D->getType();
9871   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9872   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9873   auto *MapperVarDecl =
9874       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9875   SourceLocation Loc = D->getLocation();
9876   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9877   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9878 
9879   // Prepare mapper function arguments and attributes.
9880   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9881                               C.VoidPtrTy, ImplicitParamDecl::Other);
9882   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9883                             ImplicitParamDecl::Other);
9884   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9885                              C.VoidPtrTy, ImplicitParamDecl::Other);
9886   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9887                             ImplicitParamDecl::Other);
9888   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9889                             ImplicitParamDecl::Other);
9890   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9891                             ImplicitParamDecl::Other);
9892   FunctionArgList Args;
9893   Args.push_back(&HandleArg);
9894   Args.push_back(&BaseArg);
9895   Args.push_back(&BeginArg);
9896   Args.push_back(&SizeArg);
9897   Args.push_back(&TypeArg);
9898   Args.push_back(&NameArg);
9899   const CGFunctionInfo &FnInfo =
9900       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9901   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9902   SmallString<64> TyStr;
9903   llvm::raw_svector_ostream Out(TyStr);
9904   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9905   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9906   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9907                                     Name, &CGM.getModule());
9908   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9909   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9910   // Start the mapper function code generation.
9911   CodeGenFunction MapperCGF(CGM);
9912   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9913   // Compute the starting and end addresses of array elements.
9914   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9915       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9916       C.getPointerType(Int64Ty), Loc);
9917   // Prepare common arguments for array initiation and deletion.
9918   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9919       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9920       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9921   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9922       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9923       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9924   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9925       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9926       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9927   // Convert the size in bytes into the number of array elements.
9928   Size = MapperCGF.Builder.CreateExactUDiv(
9929       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9930   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9931       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9932   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9933   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9934       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9935       C.getPointerType(Int64Ty), Loc);
9936   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9937       MapperCGF.GetAddrOfLocalVar(&NameArg),
9938       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9939 
9940   // Emit array initiation if this is an array section and \p MapType indicates
9941   // that memory allocation is required.
9942   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9943   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9944                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9945 
9946   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9947 
9948   // Emit the loop header block.
9949   MapperCGF.EmitBlock(HeadBB);
9950   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9951   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9952   // Evaluate whether the initial condition is satisfied.
9953   llvm::Value *IsEmpty =
9954       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9955   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9956   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9957 
9958   // Emit the loop body block.
9959   MapperCGF.EmitBlock(BodyBB);
9960   llvm::BasicBlock *LastBB = BodyBB;
9961   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9962       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9963   PtrPHI->addIncoming(PtrBegin, EntryBB);
9964   Address PtrCurrent(PtrPHI, ElemTy,
9965                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9966                          .getAlignment()
9967                          .alignmentOfArrayElement(ElementSize));
9968   // Privatize the declared variable of mapper to be the current array element.
9969   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9970   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9971   (void)Scope.Privatize();
9972 
9973   // Get map clause information. Fill up the arrays with all mapped variables.
9974   MappableExprsHandler::MapCombinedInfoTy Info;
9975   MappableExprsHandler MEHandler(*D, MapperCGF);
9976   MEHandler.generateAllInfoForMapper(Info);
9977 
9978   // Call the runtime API __tgt_mapper_num_components to get the number of
9979   // pre-existing components.
9980   llvm::Value *OffloadingArgs[] = {Handle};
9981   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9982       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9983                                             OMPRTL___tgt_mapper_num_components),
9984       OffloadingArgs);
9985   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9986       PreviousSize,
9987       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9988 
9989   // Fill up the runtime mapper handle for all components.
9990   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9991     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9992         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9993     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9994         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9995     llvm::Value *CurSizeArg = Info.Sizes[I];
9996     llvm::Value *CurNameArg =
9997         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9998             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9999             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10000 
10001     // Extract the MEMBER_OF field from the map type.
10002     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10003     llvm::Value *MemberMapType =
10004         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10005 
10006     // Combine the map type inherited from user-defined mapper with that
10007     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10008     // bits of the \a MapType, which is the input argument of the mapper
10009     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10010     // bits of MemberMapType.
10011     // [OpenMP 5.0], 1.2.6. map-type decay.
10012     //        | alloc |  to   | from  | tofrom | release | delete
10013     // ----------------------------------------------------------
10014     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10015     // to     | alloc |  to   | alloc |   to   | release | delete
10016     // from   | alloc | alloc | from  |  from  | release | delete
10017     // tofrom | alloc |  to   | from  | tofrom | release | delete
10018     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10019         MapType,
10020         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10021                                    MappableExprsHandler::OMP_MAP_FROM));
10022     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10023     llvm::BasicBlock *AllocElseBB =
10024         MapperCGF.createBasicBlock("omp.type.alloc.else");
10025     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10026     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10027     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10028     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10029     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10030     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10031     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10032     MapperCGF.EmitBlock(AllocBB);
10033     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10034         MemberMapType,
10035         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10036                                      MappableExprsHandler::OMP_MAP_FROM)));
10037     MapperCGF.Builder.CreateBr(EndBB);
10038     MapperCGF.EmitBlock(AllocElseBB);
10039     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10040         LeftToFrom,
10041         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10042     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10043     // In case of to, clear OMP_MAP_FROM.
10044     MapperCGF.EmitBlock(ToBB);
10045     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10046         MemberMapType,
10047         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10048     MapperCGF.Builder.CreateBr(EndBB);
10049     MapperCGF.EmitBlock(ToElseBB);
10050     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10051         LeftToFrom,
10052         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10053     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10054     // In case of from, clear OMP_MAP_TO.
10055     MapperCGF.EmitBlock(FromBB);
10056     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10057         MemberMapType,
10058         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10059     // In case of tofrom, do nothing.
10060     MapperCGF.EmitBlock(EndBB);
10061     LastBB = EndBB;
10062     llvm::PHINode *CurMapType =
10063         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10064     CurMapType->addIncoming(AllocMapType, AllocBB);
10065     CurMapType->addIncoming(ToMapType, ToBB);
10066     CurMapType->addIncoming(FromMapType, FromBB);
10067     CurMapType->addIncoming(MemberMapType, ToElseBB);
10068 
10069     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10070                                      CurSizeArg, CurMapType, CurNameArg};
10071     if (Info.Mappers[I]) {
10072       // Call the corresponding mapper function.
10073       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10074           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10075       assert(MapperFunc && "Expect a valid mapper function is available.");
10076       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10077     } else {
10078       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10079       // data structure.
10080       MapperCGF.EmitRuntimeCall(
10081           OMPBuilder.getOrCreateRuntimeFunction(
10082               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10083           OffloadingArgs);
10084     }
10085   }
10086 
10087   // Update the pointer to point to the next element that needs to be mapped,
10088   // and check whether we have mapped all elements.
10089   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10090       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10091   PtrPHI->addIncoming(PtrNext, LastBB);
10092   llvm::Value *IsDone =
10093       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10094   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10095   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10096 
10097   MapperCGF.EmitBlock(ExitBB);
10098   // Emit array deletion if this is an array section and \p MapType indicates
10099   // that deletion is required.
10100   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10101                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10102 
10103   // Emit the function exit block.
10104   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10105   MapperCGF.FinishFunction();
10106   UDMMap.try_emplace(D, Fn);
10107   if (CGF) {
10108     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10109     Decls.second.push_back(D);
10110   }
10111 }
10112 
10113 /// Emit the array initialization or deletion portion for user-defined mapper
10114 /// code generation. First, it evaluates whether an array section is mapped and
10115 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10116 /// true, and \a MapType indicates to not delete this array, array
10117 /// initialization code is generated. If \a IsInit is false, and \a MapType
10118 /// indicates to not this array, array deletion code is generated.
10119 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10120     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10121     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10122     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10123     bool IsInit) {
10124   StringRef Prefix = IsInit ? ".init" : ".del";
10125 
10126   // Evaluate if this is an array section.
10127   llvm::BasicBlock *BodyBB =
10128       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10129   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10130       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10131   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10132       MapType,
10133       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10134   llvm::Value *DeleteCond;
10135   llvm::Value *Cond;
10136   if (IsInit) {
10137     // base != begin?
10138     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10139     // IsPtrAndObj?
10140     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10141         MapType,
10142         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10143     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10144     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10145     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10146     DeleteCond = MapperCGF.Builder.CreateIsNull(
10147         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10148   } else {
10149     Cond = IsArray;
10150     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10151         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10152   }
10153   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10154   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10155 
10156   MapperCGF.EmitBlock(BodyBB);
10157   // Get the array size by multiplying element size and element number (i.e., \p
10158   // Size).
10159   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10160       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10161   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10162   // memory allocation/deletion purpose only.
10163   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10164       MapType,
10165       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10166                                    MappableExprsHandler::OMP_MAP_FROM)));
10167   MapTypeArg = MapperCGF.Builder.CreateOr(
10168       MapTypeArg,
10169       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10170 
10171   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10172   // data structure.
10173   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10174                                    ArraySize, MapTypeArg, MapName};
10175   MapperCGF.EmitRuntimeCall(
10176       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10177                                             OMPRTL___tgt_push_mapper_component),
10178       OffloadingArgs);
10179 }
10180 
10181 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10182     const OMPDeclareMapperDecl *D) {
10183   auto I = UDMMap.find(D);
10184   if (I != UDMMap.end())
10185     return I->second;
10186   emitUserDefinedMapper(D);
10187   return UDMMap.lookup(D);
10188 }
10189 
10190 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10191     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10192     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10193                                      const OMPLoopDirective &D)>
10194         SizeEmitter) {
10195   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10196   const OMPExecutableDirective *TD = &D;
10197   // Get nested teams distribute kind directive, if any.
10198   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10199     TD = getNestedDistributeDirective(CGM.getContext(), D);
10200   if (!TD)
10201     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10202 
10203   const auto *LD = cast<OMPLoopDirective>(TD);
10204   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10205     return NumIterations;
10206   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10207 }
10208 
10209 void CGOpenMPRuntime::emitTargetCall(
10210     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10211     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10212     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10213     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10214                                      const OMPLoopDirective &D)>
10215         SizeEmitter) {
10216   if (!CGF.HaveInsertPoint())
10217     return;
10218 
10219   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10220                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10221 
10222   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10223 
10224   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10225                                  D.hasClausesOfKind<OMPNowaitClause>() ||
10226                                  D.hasClausesOfKind<OMPInReductionClause>();
10227   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10228   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10229   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10230                                             PrePostActionTy &) {
10231     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10232   };
10233   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10234 
10235   CodeGenFunction::OMPTargetDataInfo InputInfo;
10236   llvm::Value *MapTypesArray = nullptr;
10237   llvm::Value *MapNamesArray = nullptr;
10238   // Generate code for the host fallback function.
10239   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10240                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10241     if (OffloadingMandatory) {
10242       CGF.Builder.CreateUnreachable();
10243     } else {
10244       if (RequiresOuterTask) {
10245         CapturedVars.clear();
10246         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10247       }
10248       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10249     }
10250   };
10251   // Fill up the pointer arrays and transfer execution to the device.
10252   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10253                     &MapNamesArray, SizeEmitter,
10254                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10255     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10256       // Reverse offloading is not supported, so just execute on the host.
10257       FallbackGen(CGF);
10258       return;
10259     }
10260 
10261     // On top of the arrays that were filled up, the target offloading call
10262     // takes as arguments the device id as well as the host pointer. The host
10263     // pointer is used by the runtime library to identify the current target
10264     // region, so it only has to be unique and not necessarily point to
10265     // anything. It could be the pointer to the outlined function that
10266     // implements the target region, but we aren't using that so that the
10267     // compiler doesn't need to keep that, and could therefore inline the host
10268     // function if proven worthwhile during optimization.
10269 
10270     // From this point on, we need to have an ID of the target region defined.
10271     assert(OutlinedFnID && "Invalid outlined function ID!");
10272     (void)OutlinedFnID;
10273 
10274     // Emit device ID if any.
10275     llvm::Value *DeviceID;
10276     if (Device.getPointer()) {
10277       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10278               Device.getInt() == OMPC_DEVICE_device_num) &&
10279              "Expected device_num modifier.");
10280       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10281       DeviceID =
10282           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10283     } else {
10284       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10285     }
10286 
10287     // Emit the number of elements in the offloading arrays.
10288     llvm::Value *PointerNum =
10289         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10290 
10291     // Return value of the runtime offloading call.
10292     llvm::Value *Return;
10293 
10294     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10295     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10296 
10297     // Source location for the ident struct
10298     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10299 
10300     // Get tripcount for the target loop-based directive.
10301     llvm::Value *NumIterations =
10302         emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10303 
10304     // Arguments for the target kernel.
10305     SmallVector<llvm::Value *> KernelArgs{
10306         CGF.Builder.getInt32(/* Version */ 1),
10307         PointerNum,
10308         InputInfo.BasePointersArray.getPointer(),
10309         InputInfo.PointersArray.getPointer(),
10310         InputInfo.SizesArray.getPointer(),
10311         MapTypesArray,
10312         MapNamesArray,
10313         InputInfo.MappersArray.getPointer(),
10314         NumIterations};
10315 
10316     // Arguments passed to the 'nowait' variant.
10317     SmallVector<llvm::Value *> NoWaitKernelArgs{
10318         CGF.Builder.getInt32(0),
10319         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10320         CGF.Builder.getInt32(0),
10321         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10322     };
10323 
10324     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10325 
10326     // The target region is an outlined function launched by the runtime
10327     // via calls to __tgt_target_kernel().
10328     //
10329     // Note that on the host and CPU targets, the runtime implementation of
10330     // these calls simply call the outlined function without forking threads.
10331     // The outlined functions themselves have runtime calls to
10332     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10333     // the compiler in emitTeamsCall() and emitParallelCall().
10334     //
10335     // In contrast, on the NVPTX target, the implementation of
10336     // __tgt_target_teams() launches a GPU kernel with the requested number
10337     // of teams and threads so no additional calls to the runtime are required.
10338     // Check the error code and execute the host version if required.
10339     CGF.Builder.restoreIP(
10340         HasNoWait ? OMPBuilder.emitTargetKernel(
10341                         CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
10342                         NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
10343                   : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
10344                                                 DeviceID, NumTeams, NumThreads,
10345                                                 OutlinedFnID, KernelArgs));
10346 
10347     llvm::BasicBlock *OffloadFailedBlock =
10348         CGF.createBasicBlock("omp_offload.failed");
10349     llvm::BasicBlock *OffloadContBlock =
10350         CGF.createBasicBlock("omp_offload.cont");
10351     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10352     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10353 
10354     CGF.EmitBlock(OffloadFailedBlock);
10355     FallbackGen(CGF);
10356 
10357     CGF.EmitBranch(OffloadContBlock);
10358 
10359     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10360   };
10361 
10362   // Notify that the host version must be executed.
10363   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10364     FallbackGen(CGF);
10365   };
10366 
10367   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10368                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10369                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10370     // Fill up the arrays with all the captured variables.
10371     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10372 
10373     // Get mappable expression information.
10374     MappableExprsHandler MEHandler(D, CGF);
10375     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10376     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10377 
10378     auto RI = CS.getCapturedRecordDecl()->field_begin();
10379     auto *CV = CapturedVars.begin();
10380     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10381                                               CE = CS.capture_end();
10382          CI != CE; ++CI, ++RI, ++CV) {
10383       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10384       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10385 
10386       // VLA sizes are passed to the outlined region by copy and do not have map
10387       // information associated.
10388       if (CI->capturesVariableArrayType()) {
10389         CurInfo.Exprs.push_back(nullptr);
10390         CurInfo.BasePointers.push_back(*CV);
10391         CurInfo.Pointers.push_back(*CV);
10392         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10393             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10394         // Copy to the device as an argument. No need to retrieve it.
10395         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10396                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10397                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10398         CurInfo.Mappers.push_back(nullptr);
10399       } else {
10400         // If we have any information in the map clause, we use it, otherwise we
10401         // just do a default mapping.
10402         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10403         if (!CI->capturesThis())
10404           MappedVarSet.insert(CI->getCapturedVar());
10405         else
10406           MappedVarSet.insert(nullptr);
10407         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10408           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10409         // Generate correct mapping for variables captured by reference in
10410         // lambdas.
10411         if (CI->capturesVariable())
10412           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10413                                                   CurInfo, LambdaPointers);
10414       }
10415       // We expect to have at least an element of information for this capture.
10416       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10417              "Non-existing map pointer for capture!");
10418       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10419              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10420              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10421              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10422              "Inconsistent map information sizes!");
10423 
10424       // If there is an entry in PartialStruct it means we have a struct with
10425       // individual members mapped. Emit an extra combined entry.
10426       if (PartialStruct.Base.isValid()) {
10427         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10428         MEHandler.emitCombinedEntry(
10429             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10430             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10431       }
10432 
10433       // We need to append the results of this capture to what we already have.
10434       CombinedInfo.append(CurInfo);
10435     }
10436     // Adjust MEMBER_OF flags for the lambdas captures.
10437     MEHandler.adjustMemberOfForLambdaCaptures(
10438         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10439         CombinedInfo.Types);
10440     // Map any list items in a map clause that were not captures because they
10441     // weren't referenced within the construct.
10442     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10443 
10444     TargetDataInfo Info;
10445     // Fill up the arrays and create the arguments.
10446     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10447     emitOffloadingArraysArgument(
10448         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10449         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10450         {/*ForEndCall=*/false});
10451 
10452     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10453     InputInfo.BasePointersArray =
10454         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10455     InputInfo.PointersArray =
10456         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10457     InputInfo.SizesArray =
10458         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10459     InputInfo.MappersArray =
10460         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10461     MapTypesArray = Info.MapTypesArray;
10462     MapNamesArray = Info.MapNamesArray;
10463     if (RequiresOuterTask)
10464       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10465     else
10466       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10467   };
10468 
10469   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10470                              CodeGenFunction &CGF, PrePostActionTy &) {
10471     if (RequiresOuterTask) {
10472       CodeGenFunction::OMPTargetDataInfo InputInfo;
10473       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10474     } else {
10475       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10476     }
10477   };
10478 
10479   // If we have a target function ID it means that we need to support
10480   // offloading, otherwise, just execute on the host. We need to execute on host
10481   // regardless of the conditional in the if clause if, e.g., the user do not
10482   // specify target triples.
10483   if (OutlinedFnID) {
10484     if (IfCond) {
10485       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10486     } else {
10487       RegionCodeGenTy ThenRCG(TargetThenGen);
10488       ThenRCG(CGF);
10489     }
10490   } else {
10491     RegionCodeGenTy ElseRCG(TargetElseGen);
10492     ElseRCG(CGF);
10493   }
10494 }
10495 
10496 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10497                                                     StringRef ParentName) {
10498   if (!S)
10499     return;
10500 
10501   // Codegen OMP target directives that offload compute to the device.
10502   bool RequiresDeviceCodegen =
10503       isa<OMPExecutableDirective>(S) &&
10504       isOpenMPTargetExecutionDirective(
10505           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10506 
10507   if (RequiresDeviceCodegen) {
10508     const auto &E = *cast<OMPExecutableDirective>(S);
10509     unsigned DeviceID;
10510     unsigned FileID;
10511     unsigned Line;
10512     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10513                              FileID, Line);
10514 
10515     // Is this a target region that should not be emitted as an entry point? If
10516     // so just signal we are done with this target region.
10517     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10518                                                             ParentName, Line))
10519       return;
10520 
10521     switch (E.getDirectiveKind()) {
10522     case OMPD_target:
10523       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10524                                                    cast<OMPTargetDirective>(E));
10525       break;
10526     case OMPD_target_parallel:
10527       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10528           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10529       break;
10530     case OMPD_target_teams:
10531       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10532           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10533       break;
10534     case OMPD_target_teams_distribute:
10535       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10536           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10537       break;
10538     case OMPD_target_teams_distribute_simd:
10539       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10540           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10541       break;
10542     case OMPD_target_parallel_for:
10543       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10544           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10545       break;
10546     case OMPD_target_parallel_for_simd:
10547       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10548           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10549       break;
10550     case OMPD_target_simd:
10551       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10552           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10553       break;
10554     case OMPD_target_teams_distribute_parallel_for:
10555       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10556           CGM, ParentName,
10557           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10558       break;
10559     case OMPD_target_teams_distribute_parallel_for_simd:
10560       CodeGenFunction::
10561           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10562               CGM, ParentName,
10563               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10564       break;
10565     case OMPD_parallel:
10566     case OMPD_for:
10567     case OMPD_parallel_for:
10568     case OMPD_parallel_master:
10569     case OMPD_parallel_sections:
10570     case OMPD_for_simd:
10571     case OMPD_parallel_for_simd:
10572     case OMPD_cancel:
10573     case OMPD_cancellation_point:
10574     case OMPD_ordered:
10575     case OMPD_threadprivate:
10576     case OMPD_allocate:
10577     case OMPD_task:
10578     case OMPD_simd:
10579     case OMPD_tile:
10580     case OMPD_unroll:
10581     case OMPD_sections:
10582     case OMPD_section:
10583     case OMPD_single:
10584     case OMPD_master:
10585     case OMPD_critical:
10586     case OMPD_taskyield:
10587     case OMPD_barrier:
10588     case OMPD_taskwait:
10589     case OMPD_taskgroup:
10590     case OMPD_atomic:
10591     case OMPD_flush:
10592     case OMPD_depobj:
10593     case OMPD_scan:
10594     case OMPD_teams:
10595     case OMPD_target_data:
10596     case OMPD_target_exit_data:
10597     case OMPD_target_enter_data:
10598     case OMPD_distribute:
10599     case OMPD_distribute_simd:
10600     case OMPD_distribute_parallel_for:
10601     case OMPD_distribute_parallel_for_simd:
10602     case OMPD_teams_distribute:
10603     case OMPD_teams_distribute_simd:
10604     case OMPD_teams_distribute_parallel_for:
10605     case OMPD_teams_distribute_parallel_for_simd:
10606     case OMPD_target_update:
10607     case OMPD_declare_simd:
10608     case OMPD_declare_variant:
10609     case OMPD_begin_declare_variant:
10610     case OMPD_end_declare_variant:
10611     case OMPD_declare_target:
10612     case OMPD_end_declare_target:
10613     case OMPD_declare_reduction:
10614     case OMPD_declare_mapper:
10615     case OMPD_taskloop:
10616     case OMPD_taskloop_simd:
10617     case OMPD_master_taskloop:
10618     case OMPD_master_taskloop_simd:
10619     case OMPD_parallel_master_taskloop:
10620     case OMPD_parallel_master_taskloop_simd:
10621     case OMPD_requires:
10622     case OMPD_metadirective:
10623     case OMPD_unknown:
10624     default:
10625       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10626     }
10627     return;
10628   }
10629 
10630   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10631     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10632       return;
10633 
10634     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10635     return;
10636   }
10637 
10638   // If this is a lambda function, look into its body.
10639   if (const auto *L = dyn_cast<LambdaExpr>(S))
10640     S = L->getBody();
10641 
10642   // Keep looking for target regions recursively.
10643   for (const Stmt *II : S->children())
10644     scanForTargetRegionsFunctions(II, ParentName);
10645 }
10646 
10647 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10648   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10649       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10650   if (!DevTy)
10651     return false;
10652   // Do not emit device_type(nohost) functions for the host.
10653   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10654     return true;
10655   // Do not emit device_type(host) functions for the device.
10656   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10657     return true;
10658   return false;
10659 }
10660 
10661 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10662   // If emitting code for the host, we do not process FD here. Instead we do
10663   // the normal code generation.
10664   if (!CGM.getLangOpts().OpenMPIsDevice) {
10665     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10666       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10667                                   CGM.getLangOpts().OpenMPIsDevice))
10668         return true;
10669     return false;
10670   }
10671 
10672   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10673   // Try to detect target regions in the function.
10674   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10675     StringRef Name = CGM.getMangledName(GD);
10676     scanForTargetRegionsFunctions(FD->getBody(), Name);
10677     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10678                                 CGM.getLangOpts().OpenMPIsDevice))
10679       return true;
10680   }
10681 
10682   // Do not to emit function if it is not marked as declare target.
10683   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10684          AlreadyEmittedTargetDecls.count(VD) == 0;
10685 }
10686 
10687 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10688   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10689                               CGM.getLangOpts().OpenMPIsDevice))
10690     return true;
10691 
10692   if (!CGM.getLangOpts().OpenMPIsDevice)
10693     return false;
10694 
10695   // Check if there are Ctors/Dtors in this declaration and look for target
10696   // regions in it. We use the complete variant to produce the kernel name
10697   // mangling.
10698   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10699   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10700     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10701       StringRef ParentName =
10702           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10703       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10704     }
10705     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10706       StringRef ParentName =
10707           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10708       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10709     }
10710   }
10711 
10712   // Do not to emit variable if it is not marked as declare target.
10713   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10714       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10715           cast<VarDecl>(GD.getDecl()));
10716   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10717       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10718        HasRequiresUnifiedSharedMemory)) {
10719     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10720     return true;
10721   }
10722   return false;
10723 }
10724 
10725 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10726                                                    llvm::Constant *Addr) {
10727   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10728       !CGM.getLangOpts().OpenMPIsDevice)
10729     return;
10730 
10731   // If we have host/nohost variables, they do not need to be registered.
10732   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10733       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10734   if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
10735     return;
10736 
10737   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10738       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10739   if (!Res) {
10740     if (CGM.getLangOpts().OpenMPIsDevice) {
10741       // Register non-target variables being emitted in device code (debug info
10742       // may cause this).
10743       StringRef VarName = CGM.getMangledName(VD);
10744       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10745     }
10746     return;
10747   }
10748   // Register declare target variables.
10749   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10750   StringRef VarName;
10751   CharUnits VarSize;
10752   llvm::GlobalValue::LinkageTypes Linkage;
10753 
10754   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10755       !HasRequiresUnifiedSharedMemory) {
10756     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10757     VarName = CGM.getMangledName(VD);
10758     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10759       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10760       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10761     } else {
10762       VarSize = CharUnits::Zero();
10763     }
10764     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10765     // Temp solution to prevent optimizations of the internal variables.
10766     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10767       // Do not create a "ref-variable" if the original is not also available
10768       // on the host.
10769       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10770         return;
10771       std::string RefName = getName({VarName, "ref"});
10772       if (!CGM.GetGlobalValue(RefName)) {
10773         llvm::Constant *AddrRef =
10774             getOrCreateInternalVariable(Addr->getType(), RefName);
10775         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10776         GVAddrRef->setConstant(/*Val=*/true);
10777         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10778         GVAddrRef->setInitializer(Addr);
10779         CGM.addCompilerUsedGlobal(GVAddrRef);
10780       }
10781     }
10782   } else {
10783     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10784             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10785              HasRequiresUnifiedSharedMemory)) &&
10786            "Declare target attribute must link or to with unified memory.");
10787     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10788       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10789     else
10790       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10791 
10792     if (CGM.getLangOpts().OpenMPIsDevice) {
10793       VarName = Addr->getName();
10794       Addr = nullptr;
10795     } else {
10796       VarName = getAddrOfDeclareTargetVar(VD).getName();
10797       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10798     }
10799     VarSize = CGM.getPointerSize();
10800     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10801   }
10802 
10803   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10804       VarName, Addr, VarSize, Flags, Linkage);
10805 }
10806 
10807 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10808   if (isa<FunctionDecl>(GD.getDecl()) ||
10809       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10810     return emitTargetFunctions(GD);
10811 
10812   return emitTargetGlobalVariable(GD);
10813 }
10814 
10815 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10816   for (const VarDecl *VD : DeferredGlobalVariables) {
10817     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10818         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10819     if (!Res)
10820       continue;
10821     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10822         !HasRequiresUnifiedSharedMemory) {
10823       CGM.EmitGlobal(VD);
10824     } else {
10825       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10826               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10827                HasRequiresUnifiedSharedMemory)) &&
10828              "Expected link clause or to clause with unified memory.");
10829       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10830     }
10831   }
10832 }
10833 
10834 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10835     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10836   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10837          " Expected target-based directive.");
10838 }
10839 
10840 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10841   for (const OMPClause *Clause : D->clauselists()) {
10842     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10843       HasRequiresUnifiedSharedMemory = true;
10844     } else if (const auto *AC =
10845                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10846       switch (AC->getAtomicDefaultMemOrderKind()) {
10847       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10848         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10849         break;
10850       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10851         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10852         break;
10853       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10854         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10855         break;
10856       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10857         break;
10858       }
10859     }
10860   }
10861 }
10862 
10863 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10864   return RequiresAtomicOrdering;
10865 }
10866 
10867 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10868                                                        LangAS &AS) {
10869   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10870     return false;
10871   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10872   switch(A->getAllocatorType()) {
10873   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10874   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10875   // Not supported, fallback to the default mem space.
10876   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10877   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10878   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10879   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10880   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10881   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10882   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10883     AS = LangAS::Default;
10884     return true;
10885   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10886     llvm_unreachable("Expected predefined allocator for the variables with the "
10887                      "static storage.");
10888   }
10889   return false;
10890 }
10891 
10892 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10893   return HasRequiresUnifiedSharedMemory;
10894 }
10895 
10896 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10897     CodeGenModule &CGM)
10898     : CGM(CGM) {
10899   if (CGM.getLangOpts().OpenMPIsDevice) {
10900     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10901     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10902   }
10903 }
10904 
10905 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10906   if (CGM.getLangOpts().OpenMPIsDevice)
10907     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10908 }
10909 
10910 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10911   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10912     return true;
10913 
10914   const auto *D = cast<FunctionDecl>(GD.getDecl());
10915   // Do not to emit function if it is marked as declare target as it was already
10916   // emitted.
10917   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10918     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10919       if (auto *F = dyn_cast_or_null<llvm::Function>(
10920               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10921         return !F->isDeclaration();
10922       return false;
10923     }
10924     return true;
10925   }
10926 
10927   return !AlreadyEmittedTargetDecls.insert(D).second;
10928 }
10929 
10930 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10931   // If we don't have entries or if we are emitting code for the device, we
10932   // don't need to do anything.
10933   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10934       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10935       (OffloadEntriesInfoManager.empty() &&
10936        !HasEmittedDeclareTargetRegion &&
10937        !HasEmittedTargetRegion))
10938     return nullptr;
10939 
10940   // Create and register the function that handles the requires directives.
10941   ASTContext &C = CGM.getContext();
10942 
10943   llvm::Function *RequiresRegFn;
10944   {
10945     CodeGenFunction CGF(CGM);
10946     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10947     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10948     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10949     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10950     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10951     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10952     // TODO: check for other requires clauses.
10953     // The requires directive takes effect only when a target region is
10954     // present in the compilation unit. Otherwise it is ignored and not
10955     // passed to the runtime. This avoids the runtime from throwing an error
10956     // for mismatching requires clauses across compilation units that don't
10957     // contain at least 1 target region.
10958     assert((HasEmittedTargetRegion ||
10959             HasEmittedDeclareTargetRegion ||
10960             !OffloadEntriesInfoManager.empty()) &&
10961            "Target or declare target region expected.");
10962     if (HasRequiresUnifiedSharedMemory)
10963       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10964     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10965                             CGM.getModule(), OMPRTL___tgt_register_requires),
10966                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10967     CGF.FinishFunction();
10968   }
10969   return RequiresRegFn;
10970 }
10971 
10972 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10973                                     const OMPExecutableDirective &D,
10974                                     SourceLocation Loc,
10975                                     llvm::Function *OutlinedFn,
10976                                     ArrayRef<llvm::Value *> CapturedVars) {
10977   if (!CGF.HaveInsertPoint())
10978     return;
10979 
10980   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10981   CodeGenFunction::RunCleanupsScope Scope(CGF);
10982 
10983   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10984   llvm::Value *Args[] = {
10985       RTLoc,
10986       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10987       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10988   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10989   RealArgs.append(std::begin(Args), std::end(Args));
10990   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10991 
10992   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10993       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10994   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10995 }
10996 
10997 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10998                                          const Expr *NumTeams,
10999                                          const Expr *ThreadLimit,
11000                                          SourceLocation Loc) {
11001   if (!CGF.HaveInsertPoint())
11002     return;
11003 
11004   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11005 
11006   llvm::Value *NumTeamsVal =
11007       NumTeams
11008           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11009                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11010           : CGF.Builder.getInt32(0);
11011 
11012   llvm::Value *ThreadLimitVal =
11013       ThreadLimit
11014           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11015                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11016           : CGF.Builder.getInt32(0);
11017 
11018   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11019   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11020                                      ThreadLimitVal};
11021   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11022                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11023                       PushNumTeamsArgs);
11024 }
11025 
11026 void CGOpenMPRuntime::emitTargetDataCalls(
11027     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11028     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11029   if (!CGF.HaveInsertPoint())
11030     return;
11031 
11032   // Action used to replace the default codegen action and turn privatization
11033   // off.
11034   PrePostActionTy NoPrivAction;
11035 
11036   // Generate the code for the opening of the data environment. Capture all the
11037   // arguments of the runtime call by reference because they are used in the
11038   // closing of the region.
11039   auto &&BeginThenGen = [this, &D, Device, &Info,
11040                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11041     // Fill up the arrays with all the mapped variables.
11042     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11043 
11044     // Get map clause information.
11045     MappableExprsHandler MEHandler(D, CGF);
11046     MEHandler.generateAllInfo(CombinedInfo);
11047 
11048     // Fill up the arrays and create the arguments.
11049     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11050                          /*IsNonContiguous=*/true);
11051 
11052     llvm::Value *BasePointersArrayArg = nullptr;
11053     llvm::Value *PointersArrayArg = nullptr;
11054     llvm::Value *SizesArrayArg = nullptr;
11055     llvm::Value *MapTypesArrayArg = nullptr;
11056     llvm::Value *MapNamesArrayArg = nullptr;
11057     llvm::Value *MappersArrayArg = nullptr;
11058     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11059                                  SizesArrayArg, MapTypesArrayArg,
11060                                  MapNamesArrayArg, MappersArrayArg, Info);
11061 
11062     // Emit device ID if any.
11063     llvm::Value *DeviceID = nullptr;
11064     if (Device) {
11065       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11066                                            CGF.Int64Ty, /*isSigned=*/true);
11067     } else {
11068       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11069     }
11070 
11071     // Emit the number of elements in the offloading arrays.
11072     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11073     //
11074     // Source location for the ident struct
11075     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11076 
11077     llvm::Value *OffloadingArgs[] = {RTLoc,
11078                                      DeviceID,
11079                                      PointerNum,
11080                                      BasePointersArrayArg,
11081                                      PointersArrayArg,
11082                                      SizesArrayArg,
11083                                      MapTypesArrayArg,
11084                                      MapNamesArrayArg,
11085                                      MappersArrayArg};
11086     CGF.EmitRuntimeCall(
11087         OMPBuilder.getOrCreateRuntimeFunction(
11088             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11089         OffloadingArgs);
11090 
11091     // If device pointer privatization is required, emit the body of the region
11092     // here. It will have to be duplicated: with and without privatization.
11093     if (!Info.CaptureDeviceAddrMap.empty())
11094       CodeGen(CGF);
11095   };
11096 
11097   // Generate code for the closing of the data region.
11098   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11099                                                 PrePostActionTy &) {
11100     assert(Info.isValid() && "Invalid data environment closing arguments.");
11101 
11102     llvm::Value *BasePointersArrayArg = nullptr;
11103     llvm::Value *PointersArrayArg = nullptr;
11104     llvm::Value *SizesArrayArg = nullptr;
11105     llvm::Value *MapTypesArrayArg = nullptr;
11106     llvm::Value *MapNamesArrayArg = nullptr;
11107     llvm::Value *MappersArrayArg = nullptr;
11108     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11109                                  SizesArrayArg, MapTypesArrayArg,
11110                                  MapNamesArrayArg, MappersArrayArg, Info,
11111                                  {/*ForEndCall=*/true});
11112 
11113     // Emit device ID if any.
11114     llvm::Value *DeviceID = nullptr;
11115     if (Device) {
11116       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11117                                            CGF.Int64Ty, /*isSigned=*/true);
11118     } else {
11119       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11120     }
11121 
11122     // Emit the number of elements in the offloading arrays.
11123     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11124 
11125     // Source location for the ident struct
11126     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11127 
11128     llvm::Value *OffloadingArgs[] = {RTLoc,
11129                                      DeviceID,
11130                                      PointerNum,
11131                                      BasePointersArrayArg,
11132                                      PointersArrayArg,
11133                                      SizesArrayArg,
11134                                      MapTypesArrayArg,
11135                                      MapNamesArrayArg,
11136                                      MappersArrayArg};
11137     CGF.EmitRuntimeCall(
11138         OMPBuilder.getOrCreateRuntimeFunction(
11139             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11140         OffloadingArgs);
11141   };
11142 
11143   // If we need device pointer privatization, we need to emit the body of the
11144   // region with no privatization in the 'else' branch of the conditional.
11145   // Otherwise, we don't have to do anything.
11146   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11147                                                          PrePostActionTy &) {
11148     if (!Info.CaptureDeviceAddrMap.empty()) {
11149       CodeGen.setAction(NoPrivAction);
11150       CodeGen(CGF);
11151     }
11152   };
11153 
11154   // We don't have to do anything to close the region if the if clause evaluates
11155   // to false.
11156   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11157 
11158   if (IfCond) {
11159     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11160   } else {
11161     RegionCodeGenTy RCG(BeginThenGen);
11162     RCG(CGF);
11163   }
11164 
11165   // If we don't require privatization of device pointers, we emit the body in
11166   // between the runtime calls. This avoids duplicating the body code.
11167   if (Info.CaptureDeviceAddrMap.empty()) {
11168     CodeGen.setAction(NoPrivAction);
11169     CodeGen(CGF);
11170   }
11171 
11172   if (IfCond) {
11173     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11174   } else {
11175     RegionCodeGenTy RCG(EndThenGen);
11176     RCG(CGF);
11177   }
11178 }
11179 
11180 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11181     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11182     const Expr *Device) {
11183   if (!CGF.HaveInsertPoint())
11184     return;
11185 
11186   assert((isa<OMPTargetEnterDataDirective>(D) ||
11187           isa<OMPTargetExitDataDirective>(D) ||
11188           isa<OMPTargetUpdateDirective>(D)) &&
11189          "Expecting either target enter, exit data, or update directives.");
11190 
11191   CodeGenFunction::OMPTargetDataInfo InputInfo;
11192   llvm::Value *MapTypesArray = nullptr;
11193   llvm::Value *MapNamesArray = nullptr;
11194   // Generate the code for the opening of the data environment.
11195   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11196                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11197     // Emit device ID if any.
11198     llvm::Value *DeviceID = nullptr;
11199     if (Device) {
11200       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11201                                            CGF.Int64Ty, /*isSigned=*/true);
11202     } else {
11203       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11204     }
11205 
11206     // Emit the number of elements in the offloading arrays.
11207     llvm::Constant *PointerNum =
11208         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11209 
11210     // Source location for the ident struct
11211     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11212 
11213     llvm::Value *OffloadingArgs[] = {RTLoc,
11214                                      DeviceID,
11215                                      PointerNum,
11216                                      InputInfo.BasePointersArray.getPointer(),
11217                                      InputInfo.PointersArray.getPointer(),
11218                                      InputInfo.SizesArray.getPointer(),
11219                                      MapTypesArray,
11220                                      MapNamesArray,
11221                                      InputInfo.MappersArray.getPointer()};
11222 
11223     // Select the right runtime function call for each standalone
11224     // directive.
11225     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11226     RuntimeFunction RTLFn;
11227     switch (D.getDirectiveKind()) {
11228     case OMPD_target_enter_data:
11229       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11230                         : OMPRTL___tgt_target_data_begin_mapper;
11231       break;
11232     case OMPD_target_exit_data:
11233       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11234                         : OMPRTL___tgt_target_data_end_mapper;
11235       break;
11236     case OMPD_target_update:
11237       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11238                         : OMPRTL___tgt_target_data_update_mapper;
11239       break;
11240     case OMPD_parallel:
11241     case OMPD_for:
11242     case OMPD_parallel_for:
11243     case OMPD_parallel_master:
11244     case OMPD_parallel_sections:
11245     case OMPD_for_simd:
11246     case OMPD_parallel_for_simd:
11247     case OMPD_cancel:
11248     case OMPD_cancellation_point:
11249     case OMPD_ordered:
11250     case OMPD_threadprivate:
11251     case OMPD_allocate:
11252     case OMPD_task:
11253     case OMPD_simd:
11254     case OMPD_tile:
11255     case OMPD_unroll:
11256     case OMPD_sections:
11257     case OMPD_section:
11258     case OMPD_single:
11259     case OMPD_master:
11260     case OMPD_critical:
11261     case OMPD_taskyield:
11262     case OMPD_barrier:
11263     case OMPD_taskwait:
11264     case OMPD_taskgroup:
11265     case OMPD_atomic:
11266     case OMPD_flush:
11267     case OMPD_depobj:
11268     case OMPD_scan:
11269     case OMPD_teams:
11270     case OMPD_target_data:
11271     case OMPD_distribute:
11272     case OMPD_distribute_simd:
11273     case OMPD_distribute_parallel_for:
11274     case OMPD_distribute_parallel_for_simd:
11275     case OMPD_teams_distribute:
11276     case OMPD_teams_distribute_simd:
11277     case OMPD_teams_distribute_parallel_for:
11278     case OMPD_teams_distribute_parallel_for_simd:
11279     case OMPD_declare_simd:
11280     case OMPD_declare_variant:
11281     case OMPD_begin_declare_variant:
11282     case OMPD_end_declare_variant:
11283     case OMPD_declare_target:
11284     case OMPD_end_declare_target:
11285     case OMPD_declare_reduction:
11286     case OMPD_declare_mapper:
11287     case OMPD_taskloop:
11288     case OMPD_taskloop_simd:
11289     case OMPD_master_taskloop:
11290     case OMPD_master_taskloop_simd:
11291     case OMPD_parallel_master_taskloop:
11292     case OMPD_parallel_master_taskloop_simd:
11293     case OMPD_target:
11294     case OMPD_target_simd:
11295     case OMPD_target_teams_distribute:
11296     case OMPD_target_teams_distribute_simd:
11297     case OMPD_target_teams_distribute_parallel_for:
11298     case OMPD_target_teams_distribute_parallel_for_simd:
11299     case OMPD_target_teams:
11300     case OMPD_target_parallel:
11301     case OMPD_target_parallel_for:
11302     case OMPD_target_parallel_for_simd:
11303     case OMPD_requires:
11304     case OMPD_metadirective:
11305     case OMPD_unknown:
11306     default:
11307       llvm_unreachable("Unexpected standalone target data directive.");
11308       break;
11309     }
11310     CGF.EmitRuntimeCall(
11311         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11312         OffloadingArgs);
11313   };
11314 
11315   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11316                           &MapNamesArray](CodeGenFunction &CGF,
11317                                           PrePostActionTy &) {
11318     // Fill up the arrays with all the mapped variables.
11319     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11320 
11321     // Get map clause information.
11322     MappableExprsHandler MEHandler(D, CGF);
11323     MEHandler.generateAllInfo(CombinedInfo);
11324 
11325     TargetDataInfo Info;
11326     // Fill up the arrays and create the arguments.
11327     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11328                          /*IsNonContiguous=*/true);
11329     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11330                              D.hasClausesOfKind<OMPNowaitClause>();
11331     emitOffloadingArraysArgument(
11332         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11333         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11334         {/*ForEndCall=*/false});
11335     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11336     InputInfo.BasePointersArray =
11337         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11338     InputInfo.PointersArray =
11339         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11340     InputInfo.SizesArray =
11341         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11342     InputInfo.MappersArray =
11343         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11344     MapTypesArray = Info.MapTypesArray;
11345     MapNamesArray = Info.MapNamesArray;
11346     if (RequiresOuterTask)
11347       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11348     else
11349       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11350   };
11351 
11352   if (IfCond) {
11353     emitIfClause(CGF, IfCond, TargetThenGen,
11354                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11355   } else {
11356     RegionCodeGenTy ThenRCG(TargetThenGen);
11357     ThenRCG(CGF);
11358   }
11359 }
11360 
11361 namespace {
11362   /// Kind of parameter in a function with 'declare simd' directive.
11363 enum ParamKindTy {
11364   Linear,
11365   LinearRef,
11366   LinearUVal,
11367   LinearVal,
11368   Uniform,
11369   Vector,
11370 };
11371 /// Attribute set of the parameter.
11372 struct ParamAttrTy {
11373   ParamKindTy Kind = Vector;
11374   llvm::APSInt StrideOrArg;
11375   llvm::APSInt Alignment;
11376   bool HasVarStride = false;
11377 };
11378 } // namespace
11379 
11380 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11381                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11382   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11383   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11384   // of that clause. The VLEN value must be power of 2.
11385   // In other case the notion of the function`s "characteristic data type" (CDT)
11386   // is used to compute the vector length.
11387   // CDT is defined in the following order:
11388   //   a) For non-void function, the CDT is the return type.
11389   //   b) If the function has any non-uniform, non-linear parameters, then the
11390   //   CDT is the type of the first such parameter.
11391   //   c) If the CDT determined by a) or b) above is struct, union, or class
11392   //   type which is pass-by-value (except for the type that maps to the
11393   //   built-in complex data type), the characteristic data type is int.
11394   //   d) If none of the above three cases is applicable, the CDT is int.
11395   // The VLEN is then determined based on the CDT and the size of vector
11396   // register of that ISA for which current vector version is generated. The
11397   // VLEN is computed using the formula below:
11398   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11399   // where vector register size specified in section 3.2.1 Registers and the
11400   // Stack Frame of original AMD64 ABI document.
11401   QualType RetType = FD->getReturnType();
11402   if (RetType.isNull())
11403     return 0;
11404   ASTContext &C = FD->getASTContext();
11405   QualType CDT;
11406   if (!RetType.isNull() && !RetType->isVoidType()) {
11407     CDT = RetType;
11408   } else {
11409     unsigned Offset = 0;
11410     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11411       if (ParamAttrs[Offset].Kind == Vector)
11412         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11413       ++Offset;
11414     }
11415     if (CDT.isNull()) {
11416       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11417         if (ParamAttrs[I + Offset].Kind == Vector) {
11418           CDT = FD->getParamDecl(I)->getType();
11419           break;
11420         }
11421       }
11422     }
11423   }
11424   if (CDT.isNull())
11425     CDT = C.IntTy;
11426   CDT = CDT->getCanonicalTypeUnqualified();
11427   if (CDT->isRecordType() || CDT->isUnionType())
11428     CDT = C.IntTy;
11429   return C.getTypeSize(CDT);
11430 }
11431 
11432 /// Mangle the parameter part of the vector function name according to
11433 /// their OpenMP classification. The mangling function is defined in
11434 /// section 4.5 of the AAVFABI(2021Q1).
11435 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11436   SmallString<256> Buffer;
11437   llvm::raw_svector_ostream Out(Buffer);
11438   for (const auto &ParamAttr : ParamAttrs) {
11439     switch (ParamAttr.Kind) {
11440     case Linear:
11441       Out << 'l';
11442       break;
11443     case LinearRef:
11444       Out << 'R';
11445       break;
11446     case LinearUVal:
11447       Out << 'U';
11448       break;
11449     case LinearVal:
11450       Out << 'L';
11451       break;
11452     case Uniform:
11453       Out << 'u';
11454       break;
11455     case Vector:
11456       Out << 'v';
11457       break;
11458     }
11459     if (ParamAttr.HasVarStride)
11460       Out << "s" << ParamAttr.StrideOrArg;
11461     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11462              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11463       // Don't print the step value if it is not present or if it is
11464       // equal to 1.
11465       if (ParamAttr.StrideOrArg < 0)
11466         Out << 'n' << -ParamAttr.StrideOrArg;
11467       else if (ParamAttr.StrideOrArg != 1)
11468         Out << ParamAttr.StrideOrArg;
11469     }
11470 
11471     if (!!ParamAttr.Alignment)
11472       Out << 'a' << ParamAttr.Alignment;
11473   }
11474 
11475   return std::string(Out.str());
11476 }
11477 
11478 static void
11479 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11480                            const llvm::APSInt &VLENVal,
11481                            ArrayRef<ParamAttrTy> ParamAttrs,
11482                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11483   struct ISADataTy {
11484     char ISA;
11485     unsigned VecRegSize;
11486   };
11487   ISADataTy ISAData[] = {
11488       {
11489           'b', 128
11490       }, // SSE
11491       {
11492           'c', 256
11493       }, // AVX
11494       {
11495           'd', 256
11496       }, // AVX2
11497       {
11498           'e', 512
11499       }, // AVX512
11500   };
11501   llvm::SmallVector<char, 2> Masked;
11502   switch (State) {
11503   case OMPDeclareSimdDeclAttr::BS_Undefined:
11504     Masked.push_back('N');
11505     Masked.push_back('M');
11506     break;
11507   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11508     Masked.push_back('N');
11509     break;
11510   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11511     Masked.push_back('M');
11512     break;
11513   }
11514   for (char Mask : Masked) {
11515     for (const ISADataTy &Data : ISAData) {
11516       SmallString<256> Buffer;
11517       llvm::raw_svector_ostream Out(Buffer);
11518       Out << "_ZGV" << Data.ISA << Mask;
11519       if (!VLENVal) {
11520         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11521         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11522         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11523       } else {
11524         Out << VLENVal;
11525       }
11526       Out << mangleVectorParameters(ParamAttrs);
11527       Out << '_' << Fn->getName();
11528       Fn->addFnAttr(Out.str());
11529     }
11530   }
11531 }
11532 
11533 // This are the Functions that are needed to mangle the name of the
11534 // vector functions generated by the compiler, according to the rules
11535 // defined in the "Vector Function ABI specifications for AArch64",
11536 // available at
11537 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11538 
11539 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11540 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11541   QT = QT.getCanonicalType();
11542 
11543   if (QT->isVoidType())
11544     return false;
11545 
11546   if (Kind == ParamKindTy::Uniform)
11547     return false;
11548 
11549   if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11550     return false;
11551 
11552   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11553       !QT->isReferenceType())
11554     return false;
11555 
11556   return true;
11557 }
11558 
11559 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11560 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11561   QT = QT.getCanonicalType();
11562   unsigned Size = C.getTypeSize(QT);
11563 
11564   // Only scalars and complex within 16 bytes wide set PVB to true.
11565   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11566     return false;
11567 
11568   if (QT->isFloatingType())
11569     return true;
11570 
11571   if (QT->isIntegerType())
11572     return true;
11573 
11574   if (QT->isPointerType())
11575     return true;
11576 
11577   // TODO: Add support for complex types (section 3.1.2, item 2).
11578 
11579   return false;
11580 }
11581 
11582 /// Computes the lane size (LS) of a return type or of an input parameter,
11583 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11584 /// TODO: Add support for references, section 3.2.1, item 1.
11585 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11586   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11587     QualType PTy = QT.getCanonicalType()->getPointeeType();
11588     if (getAArch64PBV(PTy, C))
11589       return C.getTypeSize(PTy);
11590   }
11591   if (getAArch64PBV(QT, C))
11592     return C.getTypeSize(QT);
11593 
11594   return C.getTypeSize(C.getUIntPtrType());
11595 }
11596 
11597 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11598 // signature of the scalar function, as defined in 3.2.2 of the
11599 // AAVFABI.
11600 static std::tuple<unsigned, unsigned, bool>
11601 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11602   QualType RetType = FD->getReturnType().getCanonicalType();
11603 
11604   ASTContext &C = FD->getASTContext();
11605 
11606   bool OutputBecomesInput = false;
11607 
11608   llvm::SmallVector<unsigned, 8> Sizes;
11609   if (!RetType->isVoidType()) {
11610     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11611     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11612       OutputBecomesInput = true;
11613   }
11614   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11615     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11616     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11617   }
11618 
11619   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11620   // The LS of a function parameter / return value can only be a power
11621   // of 2, starting from 8 bits, up to 128.
11622   assert(llvm::all_of(Sizes,
11623                       [](unsigned Size) {
11624                         return Size == 8 || Size == 16 || Size == 32 ||
11625                                Size == 64 || Size == 128;
11626                       }) &&
11627          "Invalid size");
11628 
11629   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11630                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11631                          OutputBecomesInput);
11632 }
11633 
11634 // Function used to add the attribute. The parameter `VLEN` is
11635 // templated to allow the use of "x" when targeting scalable functions
11636 // for SVE.
11637 template <typename T>
11638 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11639                                  char ISA, StringRef ParSeq,
11640                                  StringRef MangledName, bool OutputBecomesInput,
11641                                  llvm::Function *Fn) {
11642   SmallString<256> Buffer;
11643   llvm::raw_svector_ostream Out(Buffer);
11644   Out << Prefix << ISA << LMask << VLEN;
11645   if (OutputBecomesInput)
11646     Out << "v";
11647   Out << ParSeq << "_" << MangledName;
11648   Fn->addFnAttr(Out.str());
11649 }
11650 
11651 // Helper function to generate the Advanced SIMD names depending on
11652 // the value of the NDS when simdlen is not present.
11653 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11654                                       StringRef Prefix, char ISA,
11655                                       StringRef ParSeq, StringRef MangledName,
11656                                       bool OutputBecomesInput,
11657                                       llvm::Function *Fn) {
11658   switch (NDS) {
11659   case 8:
11660     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11661                          OutputBecomesInput, Fn);
11662     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11663                          OutputBecomesInput, Fn);
11664     break;
11665   case 16:
11666     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11667                          OutputBecomesInput, Fn);
11668     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11669                          OutputBecomesInput, Fn);
11670     break;
11671   case 32:
11672     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11673                          OutputBecomesInput, Fn);
11674     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11675                          OutputBecomesInput, Fn);
11676     break;
11677   case 64:
11678   case 128:
11679     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11680                          OutputBecomesInput, Fn);
11681     break;
11682   default:
11683     llvm_unreachable("Scalar type is too wide.");
11684   }
11685 }
11686 
11687 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11688 static void emitAArch64DeclareSimdFunction(
11689     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11690     ArrayRef<ParamAttrTy> ParamAttrs,
11691     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11692     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11693 
11694   // Get basic data for building the vector signature.
11695   const auto Data = getNDSWDS(FD, ParamAttrs);
11696   const unsigned NDS = std::get<0>(Data);
11697   const unsigned WDS = std::get<1>(Data);
11698   const bool OutputBecomesInput = std::get<2>(Data);
11699 
11700   // Check the values provided via `simdlen` by the user.
11701   // 1. A `simdlen(1)` doesn't produce vector signatures,
11702   if (UserVLEN == 1) {
11703     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11704         DiagnosticsEngine::Warning,
11705         "The clause simdlen(1) has no effect when targeting aarch64.");
11706     CGM.getDiags().Report(SLoc, DiagID);
11707     return;
11708   }
11709 
11710   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11711   // Advanced SIMD output.
11712   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11713     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11714         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11715                                     "power of 2 when targeting Advanced SIMD.");
11716     CGM.getDiags().Report(SLoc, DiagID);
11717     return;
11718   }
11719 
11720   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11721   // limits.
11722   if (ISA == 's' && UserVLEN != 0) {
11723     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11724       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11725           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11726                                       "lanes in the architectural constraints "
11727                                       "for SVE (min is 128-bit, max is "
11728                                       "2048-bit, by steps of 128-bit)");
11729       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11730       return;
11731     }
11732   }
11733 
11734   // Sort out parameter sequence.
11735   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11736   StringRef Prefix = "_ZGV";
11737   // Generate simdlen from user input (if any).
11738   if (UserVLEN) {
11739     if (ISA == 's') {
11740       // SVE generates only a masked function.
11741       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11742                            OutputBecomesInput, Fn);
11743     } else {
11744       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11745       // Advanced SIMD generates one or two functions, depending on
11746       // the `[not]inbranch` clause.
11747       switch (State) {
11748       case OMPDeclareSimdDeclAttr::BS_Undefined:
11749         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11750                              OutputBecomesInput, Fn);
11751         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11752                              OutputBecomesInput, Fn);
11753         break;
11754       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11755         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11756                              OutputBecomesInput, Fn);
11757         break;
11758       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11759         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11760                              OutputBecomesInput, Fn);
11761         break;
11762       }
11763     }
11764   } else {
11765     // If no user simdlen is provided, follow the AAVFABI rules for
11766     // generating the vector length.
11767     if (ISA == 's') {
11768       // SVE, section 3.4.1, item 1.
11769       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11770                            OutputBecomesInput, Fn);
11771     } else {
11772       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11773       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11774       // two vector names depending on the use of the clause
11775       // `[not]inbranch`.
11776       switch (State) {
11777       case OMPDeclareSimdDeclAttr::BS_Undefined:
11778         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11779                                   OutputBecomesInput, Fn);
11780         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11781                                   OutputBecomesInput, Fn);
11782         break;
11783       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11784         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11785                                   OutputBecomesInput, Fn);
11786         break;
11787       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11788         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11789                                   OutputBecomesInput, Fn);
11790         break;
11791       }
11792     }
11793   }
11794 }
11795 
11796 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11797                                               llvm::Function *Fn) {
11798   ASTContext &C = CGM.getContext();
11799   FD = FD->getMostRecentDecl();
11800   while (FD) {
11801     // Map params to their positions in function decl.
11802     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11803     if (isa<CXXMethodDecl>(FD))
11804       ParamPositions.try_emplace(FD, 0);
11805     unsigned ParamPos = ParamPositions.size();
11806     for (const ParmVarDecl *P : FD->parameters()) {
11807       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11808       ++ParamPos;
11809     }
11810     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11811       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11812       // Mark uniform parameters.
11813       for (const Expr *E : Attr->uniforms()) {
11814         E = E->IgnoreParenImpCasts();
11815         unsigned Pos;
11816         if (isa<CXXThisExpr>(E)) {
11817           Pos = ParamPositions[FD];
11818         } else {
11819           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11820                                 ->getCanonicalDecl();
11821           auto It = ParamPositions.find(PVD);
11822           assert(It != ParamPositions.end() && "Function parameter not found");
11823           Pos = It->second;
11824         }
11825         ParamAttrs[Pos].Kind = Uniform;
11826       }
11827       // Get alignment info.
11828       auto *NI = Attr->alignments_begin();
11829       for (const Expr *E : Attr->aligneds()) {
11830         E = E->IgnoreParenImpCasts();
11831         unsigned Pos;
11832         QualType ParmTy;
11833         if (isa<CXXThisExpr>(E)) {
11834           Pos = ParamPositions[FD];
11835           ParmTy = E->getType();
11836         } else {
11837           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11838                                 ->getCanonicalDecl();
11839           auto It = ParamPositions.find(PVD);
11840           assert(It != ParamPositions.end() && "Function parameter not found");
11841           Pos = It->second;
11842           ParmTy = PVD->getType();
11843         }
11844         ParamAttrs[Pos].Alignment =
11845             (*NI)
11846                 ? (*NI)->EvaluateKnownConstInt(C)
11847                 : llvm::APSInt::getUnsigned(
11848                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11849                           .getQuantity());
11850         ++NI;
11851       }
11852       // Mark linear parameters.
11853       auto *SI = Attr->steps_begin();
11854       auto *MI = Attr->modifiers_begin();
11855       for (const Expr *E : Attr->linears()) {
11856         E = E->IgnoreParenImpCasts();
11857         unsigned Pos;
11858         bool IsReferenceType = false;
11859         // Rescaling factor needed to compute the linear parameter
11860         // value in the mangled name.
11861         unsigned PtrRescalingFactor = 1;
11862         if (isa<CXXThisExpr>(E)) {
11863           Pos = ParamPositions[FD];
11864           auto *P = cast<PointerType>(E->getType());
11865           PtrRescalingFactor = CGM.getContext()
11866                                    .getTypeSizeInChars(P->getPointeeType())
11867                                    .getQuantity();
11868         } else {
11869           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11870                                 ->getCanonicalDecl();
11871           auto It = ParamPositions.find(PVD);
11872           assert(It != ParamPositions.end() && "Function parameter not found");
11873           Pos = It->second;
11874           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11875             PtrRescalingFactor = CGM.getContext()
11876                                      .getTypeSizeInChars(P->getPointeeType())
11877                                      .getQuantity();
11878           else if (PVD->getType()->isReferenceType()) {
11879             IsReferenceType = true;
11880             PtrRescalingFactor =
11881                 CGM.getContext()
11882                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11883                     .getQuantity();
11884           }
11885         }
11886         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11887         if (*MI == OMPC_LINEAR_ref)
11888           ParamAttr.Kind = LinearRef;
11889         else if (*MI == OMPC_LINEAR_uval)
11890           ParamAttr.Kind = LinearUVal;
11891         else if (IsReferenceType)
11892           ParamAttr.Kind = LinearVal;
11893         else
11894           ParamAttr.Kind = Linear;
11895         // Assuming a stride of 1, for `linear` without modifiers.
11896         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11897         if (*SI) {
11898           Expr::EvalResult Result;
11899           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11900             if (const auto *DRE =
11901                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11902               if (const auto *StridePVD =
11903                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11904                 ParamAttr.HasVarStride = true;
11905                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11906                 assert(It != ParamPositions.end() &&
11907                        "Function parameter not found");
11908                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11909               }
11910             }
11911           } else {
11912             ParamAttr.StrideOrArg = Result.Val.getInt();
11913           }
11914         }
11915         // If we are using a linear clause on a pointer, we need to
11916         // rescale the value of linear_step with the byte size of the
11917         // pointee type.
11918         if (!ParamAttr.HasVarStride &&
11919             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11920           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11921         ++SI;
11922         ++MI;
11923       }
11924       llvm::APSInt VLENVal;
11925       SourceLocation ExprLoc;
11926       const Expr *VLENExpr = Attr->getSimdlen();
11927       if (VLENExpr) {
11928         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11929         ExprLoc = VLENExpr->getExprLoc();
11930       }
11931       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11932       if (CGM.getTriple().isX86()) {
11933         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11934       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11935         unsigned VLEN = VLENVal.getExtValue();
11936         StringRef MangledName = Fn->getName();
11937         if (CGM.getTarget().hasFeature("sve"))
11938           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11939                                          MangledName, 's', 128, Fn, ExprLoc);
11940         if (CGM.getTarget().hasFeature("neon"))
11941           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11942                                          MangledName, 'n', 128, Fn, ExprLoc);
11943       }
11944     }
11945     FD = FD->getPreviousDecl();
11946   }
11947 }
11948 
11949 namespace {
11950 /// Cleanup action for doacross support.
11951 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11952 public:
11953   static const int DoacrossFinArgs = 2;
11954 
11955 private:
11956   llvm::FunctionCallee RTLFn;
11957   llvm::Value *Args[DoacrossFinArgs];
11958 
11959 public:
11960   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11961                     ArrayRef<llvm::Value *> CallArgs)
11962       : RTLFn(RTLFn) {
11963     assert(CallArgs.size() == DoacrossFinArgs);
11964     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11965   }
11966   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11967     if (!CGF.HaveInsertPoint())
11968       return;
11969     CGF.EmitRuntimeCall(RTLFn, Args);
11970   }
11971 };
11972 } // namespace
11973 
11974 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11975                                        const OMPLoopDirective &D,
11976                                        ArrayRef<Expr *> NumIterations) {
11977   if (!CGF.HaveInsertPoint())
11978     return;
11979 
11980   ASTContext &C = CGM.getContext();
11981   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11982   RecordDecl *RD;
11983   if (KmpDimTy.isNull()) {
11984     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11985     //  kmp_int64 lo; // lower
11986     //  kmp_int64 up; // upper
11987     //  kmp_int64 st; // stride
11988     // };
11989     RD = C.buildImplicitRecord("kmp_dim");
11990     RD->startDefinition();
11991     addFieldToRecordDecl(C, RD, Int64Ty);
11992     addFieldToRecordDecl(C, RD, Int64Ty);
11993     addFieldToRecordDecl(C, RD, Int64Ty);
11994     RD->completeDefinition();
11995     KmpDimTy = C.getRecordType(RD);
11996   } else {
11997     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11998   }
11999   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12000   QualType ArrayTy =
12001       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12002 
12003   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12004   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12005   enum { LowerFD = 0, UpperFD, StrideFD };
12006   // Fill dims with data.
12007   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12008     LValue DimsLVal = CGF.MakeAddrLValue(
12009         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12010     // dims.upper = num_iterations;
12011     LValue UpperLVal = CGF.EmitLValueForField(
12012         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12013     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12014         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12015         Int64Ty, NumIterations[I]->getExprLoc());
12016     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12017     // dims.stride = 1;
12018     LValue StrideLVal = CGF.EmitLValueForField(
12019         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12020     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12021                           StrideLVal);
12022   }
12023 
12024   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12025   // kmp_int32 num_dims, struct kmp_dim * dims);
12026   llvm::Value *Args[] = {
12027       emitUpdateLocation(CGF, D.getBeginLoc()),
12028       getThreadID(CGF, D.getBeginLoc()),
12029       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12031           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12032           CGM.VoidPtrTy)};
12033 
12034   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12035       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12036   CGF.EmitRuntimeCall(RTLFn, Args);
12037   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12038       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12039   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12040       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12041   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12042                                              llvm::makeArrayRef(FiniArgs));
12043 }
12044 
12045 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12046                                           const OMPDependClause *C) {
12047   QualType Int64Ty =
12048       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12049   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12050   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12051       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12052   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12053   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12054     const Expr *CounterVal = C->getLoopData(I);
12055     assert(CounterVal);
12056     llvm::Value *CntVal = CGF.EmitScalarConversion(
12057         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12058         CounterVal->getExprLoc());
12059     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12060                           /*Volatile=*/false, Int64Ty);
12061   }
12062   llvm::Value *Args[] = {
12063       emitUpdateLocation(CGF, C->getBeginLoc()),
12064       getThreadID(CGF, C->getBeginLoc()),
12065       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12066   llvm::FunctionCallee RTLFn;
12067   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12068     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12069                                                   OMPRTL___kmpc_doacross_post);
12070   } else {
12071     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12072     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12073                                                   OMPRTL___kmpc_doacross_wait);
12074   }
12075   CGF.EmitRuntimeCall(RTLFn, Args);
12076 }
12077 
12078 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12079                                llvm::FunctionCallee Callee,
12080                                ArrayRef<llvm::Value *> Args) const {
12081   assert(Loc.isValid() && "Outlined function call location must be valid.");
12082   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12083 
12084   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12085     if (Fn->doesNotThrow()) {
12086       CGF.EmitNounwindRuntimeCall(Fn, Args);
12087       return;
12088     }
12089   }
12090   CGF.EmitRuntimeCall(Callee, Args);
12091 }
12092 
12093 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12094     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12095     ArrayRef<llvm::Value *> Args) const {
12096   emitCall(CGF, Loc, OutlinedFn, Args);
12097 }
12098 
12099 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12100   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12101     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12102       HasEmittedDeclareTargetRegion = true;
12103 }
12104 
12105 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12106                                              const VarDecl *NativeParam,
12107                                              const VarDecl *TargetParam) const {
12108   return CGF.GetAddrOfLocalVar(NativeParam);
12109 }
12110 
12111 /// Return allocator value from expression, or return a null allocator (default
12112 /// when no allocator specified).
12113 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12114                                     const Expr *Allocator) {
12115   llvm::Value *AllocVal;
12116   if (Allocator) {
12117     AllocVal = CGF.EmitScalarExpr(Allocator);
12118     // According to the standard, the original allocator type is a enum
12119     // (integer). Convert to pointer type, if required.
12120     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12121                                         CGF.getContext().VoidPtrTy,
12122                                         Allocator->getExprLoc());
12123   } else {
12124     // If no allocator specified, it defaults to the null allocator.
12125     AllocVal = llvm::Constant::getNullValue(
12126         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12127   }
12128   return AllocVal;
12129 }
12130 
12131 /// Return the alignment from an allocate directive if present.
12132 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12133   llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12134 
12135   if (!AllocateAlignment)
12136     return nullptr;
12137 
12138   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12139 }
12140 
12141 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12142                                                    const VarDecl *VD) {
12143   if (!VD)
12144     return Address::invalid();
12145   Address UntiedAddr = Address::invalid();
12146   Address UntiedRealAddr = Address::invalid();
12147   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12148   if (It != FunctionToUntiedTaskStackMap.end()) {
12149     const UntiedLocalVarsAddressesMap &UntiedData =
12150         UntiedLocalVarsStack[It->second];
12151     auto I = UntiedData.find(VD);
12152     if (I != UntiedData.end()) {
12153       UntiedAddr = I->second.first;
12154       UntiedRealAddr = I->second.second;
12155     }
12156   }
12157   const VarDecl *CVD = VD->getCanonicalDecl();
12158   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12159     // Use the default allocation.
12160     if (!isAllocatableDecl(VD))
12161       return UntiedAddr;
12162     llvm::Value *Size;
12163     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12164     if (CVD->getType()->isVariablyModifiedType()) {
12165       Size = CGF.getTypeSize(CVD->getType());
12166       // Align the size: ((size + align - 1) / align) * align
12167       Size = CGF.Builder.CreateNUWAdd(
12168           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12169       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12170       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12171     } else {
12172       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12173       Size = CGM.getSize(Sz.alignTo(Align));
12174     }
12175     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12176     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12177     const Expr *Allocator = AA->getAllocator();
12178     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12179     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12180     SmallVector<llvm::Value *, 4> Args;
12181     Args.push_back(ThreadID);
12182     if (Alignment)
12183       Args.push_back(Alignment);
12184     Args.push_back(Size);
12185     Args.push_back(AllocVal);
12186     llvm::omp::RuntimeFunction FnID =
12187         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12188     llvm::Value *Addr = CGF.EmitRuntimeCall(
12189         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12190         getName({CVD->getName(), ".void.addr"}));
12191     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12192         CGM.getModule(), OMPRTL___kmpc_free);
12193     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12194     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12195         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12196     if (UntiedAddr.isValid())
12197       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12198 
12199     // Cleanup action for allocate support.
12200     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12201       llvm::FunctionCallee RTLFn;
12202       SourceLocation::UIntTy LocEncoding;
12203       Address Addr;
12204       const Expr *AllocExpr;
12205 
12206     public:
12207       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12208                            SourceLocation::UIntTy LocEncoding, Address Addr,
12209                            const Expr *AllocExpr)
12210           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12211             AllocExpr(AllocExpr) {}
12212       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12213         if (!CGF.HaveInsertPoint())
12214           return;
12215         llvm::Value *Args[3];
12216         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12217             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12218         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12219             Addr.getPointer(), CGF.VoidPtrTy);
12220         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12221         Args[2] = AllocVal;
12222         CGF.EmitRuntimeCall(RTLFn, Args);
12223       }
12224     };
12225     Address VDAddr =
12226         UntiedRealAddr.isValid()
12227             ? UntiedRealAddr
12228             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12229     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12230         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12231         VDAddr, Allocator);
12232     if (UntiedRealAddr.isValid())
12233       if (auto *Region =
12234               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12235         Region->emitUntiedSwitch(CGF);
12236     return VDAddr;
12237   }
12238   return UntiedAddr;
12239 }
12240 
12241 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12242                                              const VarDecl *VD) const {
12243   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12244   if (It == FunctionToUntiedTaskStackMap.end())
12245     return false;
12246   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12247 }
12248 
12249 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12250     CodeGenModule &CGM, const OMPLoopDirective &S)
12251     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12252   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12253   if (!NeedToPush)
12254     return;
12255   NontemporalDeclsSet &DS =
12256       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12257   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12258     for (const Stmt *Ref : C->private_refs()) {
12259       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12260       const ValueDecl *VD;
12261       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12262         VD = DRE->getDecl();
12263       } else {
12264         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12265         assert((ME->isImplicitCXXThis() ||
12266                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12267                "Expected member of current class.");
12268         VD = ME->getMemberDecl();
12269       }
12270       DS.insert(VD);
12271     }
12272   }
12273 }
12274 
12275 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12276   if (!NeedToPush)
12277     return;
12278   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12279 }
12280 
12281 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12282     CodeGenFunction &CGF,
12283     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12284                           std::pair<Address, Address>> &LocalVars)
12285     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12286   if (!NeedToPush)
12287     return;
12288   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12289       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12290   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12291 }
12292 
12293 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12294   if (!NeedToPush)
12295     return;
12296   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12297 }
12298 
12299 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12300   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12301 
12302   return llvm::any_of(
12303       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12304       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12305 }
12306 
12307 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12308     const OMPExecutableDirective &S,
12309     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12310     const {
12311   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12312   // Vars in target/task regions must be excluded completely.
12313   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12314       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12315     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12316     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12317     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12318     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12319       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12320         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12321     }
12322   }
12323   // Exclude vars in private clauses.
12324   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12325     for (const Expr *Ref : C->varlists()) {
12326       if (!Ref->getType()->isScalarType())
12327         continue;
12328       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12329       if (!DRE)
12330         continue;
12331       NeedToCheckForLPCs.insert(DRE->getDecl());
12332     }
12333   }
12334   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12335     for (const Expr *Ref : C->varlists()) {
12336       if (!Ref->getType()->isScalarType())
12337         continue;
12338       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12339       if (!DRE)
12340         continue;
12341       NeedToCheckForLPCs.insert(DRE->getDecl());
12342     }
12343   }
12344   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12345     for (const Expr *Ref : C->varlists()) {
12346       if (!Ref->getType()->isScalarType())
12347         continue;
12348       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12349       if (!DRE)
12350         continue;
12351       NeedToCheckForLPCs.insert(DRE->getDecl());
12352     }
12353   }
12354   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12355     for (const Expr *Ref : C->varlists()) {
12356       if (!Ref->getType()->isScalarType())
12357         continue;
12358       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12359       if (!DRE)
12360         continue;
12361       NeedToCheckForLPCs.insert(DRE->getDecl());
12362     }
12363   }
12364   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12365     for (const Expr *Ref : C->varlists()) {
12366       if (!Ref->getType()->isScalarType())
12367         continue;
12368       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12369       if (!DRE)
12370         continue;
12371       NeedToCheckForLPCs.insert(DRE->getDecl());
12372     }
12373   }
12374   for (const Decl *VD : NeedToCheckForLPCs) {
12375     for (const LastprivateConditionalData &Data :
12376          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12377       if (Data.DeclToUniqueName.count(VD) > 0) {
12378         if (!Data.Disabled)
12379           NeedToAddForLPCsAsDisabled.insert(VD);
12380         break;
12381       }
12382     }
12383   }
12384 }
12385 
12386 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12387     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12388     : CGM(CGF.CGM),
12389       Action((CGM.getLangOpts().OpenMP >= 50 &&
12390               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12391                            [](const OMPLastprivateClause *C) {
12392                              return C->getKind() ==
12393                                     OMPC_LASTPRIVATE_conditional;
12394                            }))
12395                  ? ActionToDo::PushAsLastprivateConditional
12396                  : ActionToDo::DoNotPush) {
12397   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12398   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12399     return;
12400   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12401          "Expected a push action.");
12402   LastprivateConditionalData &Data =
12403       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12404   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12405     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12406       continue;
12407 
12408     for (const Expr *Ref : C->varlists()) {
12409       Data.DeclToUniqueName.insert(std::make_pair(
12410           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12411           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12412     }
12413   }
12414   Data.IVLVal = IVLVal;
12415   Data.Fn = CGF.CurFn;
12416 }
12417 
12418 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12419     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12420     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12421   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12422   if (CGM.getLangOpts().OpenMP < 50)
12423     return;
12424   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12425   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12426   if (!NeedToAddForLPCsAsDisabled.empty()) {
12427     Action = ActionToDo::DisableLastprivateConditional;
12428     LastprivateConditionalData &Data =
12429         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12430     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12431       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12432     Data.Fn = CGF.CurFn;
12433     Data.Disabled = true;
12434   }
12435 }
12436 
12437 CGOpenMPRuntime::LastprivateConditionalRAII
12438 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12439     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12440   return LastprivateConditionalRAII(CGF, S);
12441 }
12442 
12443 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12444   if (CGM.getLangOpts().OpenMP < 50)
12445     return;
12446   if (Action == ActionToDo::DisableLastprivateConditional) {
12447     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12448            "Expected list of disabled private vars.");
12449     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12450   }
12451   if (Action == ActionToDo::PushAsLastprivateConditional) {
12452     assert(
12453         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12454         "Expected list of lastprivate conditional vars.");
12455     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12456   }
12457 }
12458 
12459 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12460                                                         const VarDecl *VD) {
12461   ASTContext &C = CGM.getContext();
12462   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12463   if (I == LastprivateConditionalToTypes.end())
12464     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12465   QualType NewType;
12466   const FieldDecl *VDField;
12467   const FieldDecl *FiredField;
12468   LValue BaseLVal;
12469   auto VI = I->getSecond().find(VD);
12470   if (VI == I->getSecond().end()) {
12471     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12472     RD->startDefinition();
12473     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12474     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12475     RD->completeDefinition();
12476     NewType = C.getRecordType(RD);
12477     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12478     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12479     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12480   } else {
12481     NewType = std::get<0>(VI->getSecond());
12482     VDField = std::get<1>(VI->getSecond());
12483     FiredField = std::get<2>(VI->getSecond());
12484     BaseLVal = std::get<3>(VI->getSecond());
12485   }
12486   LValue FiredLVal =
12487       CGF.EmitLValueForField(BaseLVal, FiredField);
12488   CGF.EmitStoreOfScalar(
12489       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12490       FiredLVal);
12491   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12492 }
12493 
12494 namespace {
12495 /// Checks if the lastprivate conditional variable is referenced in LHS.
12496 class LastprivateConditionalRefChecker final
12497     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12498   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12499   const Expr *FoundE = nullptr;
12500   const Decl *FoundD = nullptr;
12501   StringRef UniqueDeclName;
12502   LValue IVLVal;
12503   llvm::Function *FoundFn = nullptr;
12504   SourceLocation Loc;
12505 
12506 public:
12507   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12508     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12509          llvm::reverse(LPM)) {
12510       auto It = D.DeclToUniqueName.find(E->getDecl());
12511       if (It == D.DeclToUniqueName.end())
12512         continue;
12513       if (D.Disabled)
12514         return false;
12515       FoundE = E;
12516       FoundD = E->getDecl()->getCanonicalDecl();
12517       UniqueDeclName = It->second;
12518       IVLVal = D.IVLVal;
12519       FoundFn = D.Fn;
12520       break;
12521     }
12522     return FoundE == E;
12523   }
12524   bool VisitMemberExpr(const MemberExpr *E) {
12525     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12526       return false;
12527     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12528          llvm::reverse(LPM)) {
12529       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12530       if (It == D.DeclToUniqueName.end())
12531         continue;
12532       if (D.Disabled)
12533         return false;
12534       FoundE = E;
12535       FoundD = E->getMemberDecl()->getCanonicalDecl();
12536       UniqueDeclName = It->second;
12537       IVLVal = D.IVLVal;
12538       FoundFn = D.Fn;
12539       break;
12540     }
12541     return FoundE == E;
12542   }
12543   bool VisitStmt(const Stmt *S) {
12544     for (const Stmt *Child : S->children()) {
12545       if (!Child)
12546         continue;
12547       if (const auto *E = dyn_cast<Expr>(Child))
12548         if (!E->isGLValue())
12549           continue;
12550       if (Visit(Child))
12551         return true;
12552     }
12553     return false;
12554   }
12555   explicit LastprivateConditionalRefChecker(
12556       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12557       : LPM(LPM) {}
12558   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12559   getFoundData() const {
12560     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12561   }
12562 };
12563 } // namespace
12564 
12565 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12566                                                        LValue IVLVal,
12567                                                        StringRef UniqueDeclName,
12568                                                        LValue LVal,
12569                                                        SourceLocation Loc) {
12570   // Last updated loop counter for the lastprivate conditional var.
12571   // int<xx> last_iv = 0;
12572   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12573   llvm::Constant *LastIV =
12574       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12575   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12576       IVLVal.getAlignment().getAsAlign());
12577   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12578 
12579   // Last value of the lastprivate conditional.
12580   // decltype(priv_a) last_a;
12581   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12582       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12583   Last->setAlignment(LVal.getAlignment().getAsAlign());
12584   LValue LastLVal = CGF.MakeAddrLValue(
12585       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12586 
12587   // Global loop counter. Required to handle inner parallel-for regions.
12588   // iv
12589   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12590 
12591   // #pragma omp critical(a)
12592   // if (last_iv <= iv) {
12593   //   last_iv = iv;
12594   //   last_a = priv_a;
12595   // }
12596   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12597                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12598     Action.Enter(CGF);
12599     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12600     // (last_iv <= iv) ? Check if the variable is updated and store new
12601     // value in global var.
12602     llvm::Value *CmpRes;
12603     if (IVLVal.getType()->isSignedIntegerType()) {
12604       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12605     } else {
12606       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12607              "Loop iteration variable must be integer.");
12608       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12609     }
12610     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12611     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12612     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12613     // {
12614     CGF.EmitBlock(ThenBB);
12615 
12616     //   last_iv = iv;
12617     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12618 
12619     //   last_a = priv_a;
12620     switch (CGF.getEvaluationKind(LVal.getType())) {
12621     case TEK_Scalar: {
12622       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12623       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12624       break;
12625     }
12626     case TEK_Complex: {
12627       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12628       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12629       break;
12630     }
12631     case TEK_Aggregate:
12632       llvm_unreachable(
12633           "Aggregates are not supported in lastprivate conditional.");
12634     }
12635     // }
12636     CGF.EmitBranch(ExitBB);
12637     // There is no need to emit line number for unconditional branch.
12638     (void)ApplyDebugLocation::CreateEmpty(CGF);
12639     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12640   };
12641 
12642   if (CGM.getLangOpts().OpenMPSimd) {
12643     // Do not emit as a critical region as no parallel region could be emitted.
12644     RegionCodeGenTy ThenRCG(CodeGen);
12645     ThenRCG(CGF);
12646   } else {
12647     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12648   }
12649 }
12650 
12651 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12652                                                          const Expr *LHS) {
12653   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12654     return;
12655   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12656   if (!Checker.Visit(LHS))
12657     return;
12658   const Expr *FoundE;
12659   const Decl *FoundD;
12660   StringRef UniqueDeclName;
12661   LValue IVLVal;
12662   llvm::Function *FoundFn;
12663   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12664       Checker.getFoundData();
12665   if (FoundFn != CGF.CurFn) {
12666     // Special codegen for inner parallel regions.
12667     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12668     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12669     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12670            "Lastprivate conditional is not found in outer region.");
12671     QualType StructTy = std::get<0>(It->getSecond());
12672     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12673     LValue PrivLVal = CGF.EmitLValue(FoundE);
12674     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12675         PrivLVal.getAddress(CGF),
12676         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12677         CGF.ConvertTypeForMem(StructTy));
12678     LValue BaseLVal =
12679         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12680     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12681     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12682                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12683                         FiredLVal, llvm::AtomicOrdering::Unordered,
12684                         /*IsVolatile=*/true, /*isInit=*/false);
12685     return;
12686   }
12687 
12688   // Private address of the lastprivate conditional in the current context.
12689   // priv_a
12690   LValue LVal = CGF.EmitLValue(FoundE);
12691   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12692                                    FoundE->getExprLoc());
12693 }
12694 
12695 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12696     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12697     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12698   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12699     return;
12700   auto Range = llvm::reverse(LastprivateConditionalStack);
12701   auto It = llvm::find_if(
12702       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12703   if (It == Range.end() || It->Fn != CGF.CurFn)
12704     return;
12705   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12706   assert(LPCI != LastprivateConditionalToTypes.end() &&
12707          "Lastprivates must be registered already.");
12708   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12709   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12710   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12711   for (const auto &Pair : It->DeclToUniqueName) {
12712     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12713     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12714       continue;
12715     auto I = LPCI->getSecond().find(Pair.first);
12716     assert(I != LPCI->getSecond().end() &&
12717            "Lastprivate must be rehistered already.");
12718     // bool Cmp = priv_a.Fired != 0;
12719     LValue BaseLVal = std::get<3>(I->getSecond());
12720     LValue FiredLVal =
12721         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12722     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12723     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12724     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12725     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12726     // if (Cmp) {
12727     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12728     CGF.EmitBlock(ThenBB);
12729     Address Addr = CGF.GetAddrOfLocalVar(VD);
12730     LValue LVal;
12731     if (VD->getType()->isReferenceType())
12732       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12733                                            AlignmentSource::Decl);
12734     else
12735       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12736                                 AlignmentSource::Decl);
12737     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12738                                      D.getBeginLoc());
12739     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12740     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12741     // }
12742   }
12743 }
12744 
12745 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12746     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12747     SourceLocation Loc) {
12748   if (CGF.getLangOpts().OpenMP < 50)
12749     return;
12750   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12751   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12752          "Unknown lastprivate conditional variable.");
12753   StringRef UniqueName = It->second;
12754   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12755   // The variable was not updated in the region - exit.
12756   if (!GV)
12757     return;
12758   LValue LPLVal = CGF.MakeAddrLValue(
12759       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12760       PrivLVal.getType().getNonReferenceType());
12761   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12762   CGF.EmitStoreOfScalar(Res, PrivLVal);
12763 }
12764 
12765 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12766     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12767     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12768   llvm_unreachable("Not supported in SIMD-only mode");
12769 }
12770 
12771 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12772     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12773     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12774   llvm_unreachable("Not supported in SIMD-only mode");
12775 }
12776 
12777 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12778     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12779     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12780     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12781     bool Tied, unsigned &NumberOfParts) {
12782   llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784 
12785 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12786                                            SourceLocation Loc,
12787                                            llvm::Function *OutlinedFn,
12788                                            ArrayRef<llvm::Value *> CapturedVars,
12789                                            const Expr *IfCond,
12790                                            llvm::Value *NumThreads) {
12791   llvm_unreachable("Not supported in SIMD-only mode");
12792 }
12793 
12794 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12795     CodeGenFunction &CGF, StringRef CriticalName,
12796     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12797     const Expr *Hint) {
12798   llvm_unreachable("Not supported in SIMD-only mode");
12799 }
12800 
12801 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12802                                            const RegionCodeGenTy &MasterOpGen,
12803                                            SourceLocation Loc) {
12804   llvm_unreachable("Not supported in SIMD-only mode");
12805 }
12806 
12807 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12808                                            const RegionCodeGenTy &MasterOpGen,
12809                                            SourceLocation Loc,
12810                                            const Expr *Filter) {
12811   llvm_unreachable("Not supported in SIMD-only mode");
12812 }
12813 
12814 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12815                                             SourceLocation Loc) {
12816   llvm_unreachable("Not supported in SIMD-only mode");
12817 }
12818 
12819 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12820     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12821     SourceLocation Loc) {
12822   llvm_unreachable("Not supported in SIMD-only mode");
12823 }
12824 
12825 void CGOpenMPSIMDRuntime::emitSingleRegion(
12826     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12827     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12828     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12829     ArrayRef<const Expr *> AssignmentOps) {
12830   llvm_unreachable("Not supported in SIMD-only mode");
12831 }
12832 
12833 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12834                                             const RegionCodeGenTy &OrderedOpGen,
12835                                             SourceLocation Loc,
12836                                             bool IsThreads) {
12837   llvm_unreachable("Not supported in SIMD-only mode");
12838 }
12839 
12840 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12841                                           SourceLocation Loc,
12842                                           OpenMPDirectiveKind Kind,
12843                                           bool EmitChecks,
12844                                           bool ForceSimpleCall) {
12845   llvm_unreachable("Not supported in SIMD-only mode");
12846 }
12847 
12848 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12849     CodeGenFunction &CGF, SourceLocation Loc,
12850     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12851     bool Ordered, const DispatchRTInput &DispatchValues) {
12852   llvm_unreachable("Not supported in SIMD-only mode");
12853 }
12854 
12855 void CGOpenMPSIMDRuntime::emitForStaticInit(
12856     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12857     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12858   llvm_unreachable("Not supported in SIMD-only mode");
12859 }
12860 
12861 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12862     CodeGenFunction &CGF, SourceLocation Loc,
12863     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12864   llvm_unreachable("Not supported in SIMD-only mode");
12865 }
12866 
12867 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12868                                                      SourceLocation Loc,
12869                                                      unsigned IVSize,
12870                                                      bool IVSigned) {
12871   llvm_unreachable("Not supported in SIMD-only mode");
12872 }
12873 
12874 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12875                                               SourceLocation Loc,
12876                                               OpenMPDirectiveKind DKind) {
12877   llvm_unreachable("Not supported in SIMD-only mode");
12878 }
12879 
12880 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12881                                               SourceLocation Loc,
12882                                               unsigned IVSize, bool IVSigned,
12883                                               Address IL, Address LB,
12884                                               Address UB, Address ST) {
12885   llvm_unreachable("Not supported in SIMD-only mode");
12886 }
12887 
12888 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12889                                                llvm::Value *NumThreads,
12890                                                SourceLocation Loc) {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893 
12894 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12895                                              ProcBindKind ProcBind,
12896                                              SourceLocation Loc) {
12897   llvm_unreachable("Not supported in SIMD-only mode");
12898 }
12899 
12900 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12901                                                     const VarDecl *VD,
12902                                                     Address VDAddr,
12903                                                     SourceLocation Loc) {
12904   llvm_unreachable("Not supported in SIMD-only mode");
12905 }
12906 
12907 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12908     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12909     CodeGenFunction *CGF) {
12910   llvm_unreachable("Not supported in SIMD-only mode");
12911 }
12912 
12913 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12914     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12919                                     ArrayRef<const Expr *> Vars,
12920                                     SourceLocation Loc,
12921                                     llvm::AtomicOrdering AO) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12926                                        const OMPExecutableDirective &D,
12927                                        llvm::Function *TaskFunction,
12928                                        QualType SharedsTy, Address Shareds,
12929                                        const Expr *IfCond,
12930                                        const OMPTaskDataTy &Data) {
12931   llvm_unreachable("Not supported in SIMD-only mode");
12932 }
12933 
12934 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12935     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12936     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12937     const Expr *IfCond, const OMPTaskDataTy &Data) {
12938   llvm_unreachable("Not supported in SIMD-only mode");
12939 }
12940 
12941 void CGOpenMPSIMDRuntime::emitReduction(
12942     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12943     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12944     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12945   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12946   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12947                                  ReductionOps, Options);
12948 }
12949 
12950 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12951     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12952     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12953   llvm_unreachable("Not supported in SIMD-only mode");
12954 }
12955 
12956 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12957                                                 SourceLocation Loc,
12958                                                 bool IsWorksharingReduction) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
12962 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12963                                                   SourceLocation Loc,
12964                                                   ReductionCodeGen &RCG,
12965                                                   unsigned N) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
12969 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12970                                                   SourceLocation Loc,
12971                                                   llvm::Value *ReductionsPtr,
12972                                                   LValue SharedLVal) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12977                                            SourceLocation Loc,
12978                                            const OMPTaskDataTy &Data) {
12979   llvm_unreachable("Not supported in SIMD-only mode");
12980 }
12981 
12982 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12983     CodeGenFunction &CGF, SourceLocation Loc,
12984     OpenMPDirectiveKind CancelRegion) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
12988 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12989                                          SourceLocation Loc, const Expr *IfCond,
12990                                          OpenMPDirectiveKind CancelRegion) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12995     const OMPExecutableDirective &D, StringRef ParentName,
12996     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12997     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12998   llvm_unreachable("Not supported in SIMD-only mode");
12999 }
13000 
13001 void CGOpenMPSIMDRuntime::emitTargetCall(
13002     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13003     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13004     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13005     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13006                                      const OMPLoopDirective &D)>
13007         SizeEmitter) {
13008   llvm_unreachable("Not supported in SIMD-only mode");
13009 }
13010 
13011 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13012   llvm_unreachable("Not supported in SIMD-only mode");
13013 }
13014 
13015 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13020   return false;
13021 }
13022 
13023 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13024                                         const OMPExecutableDirective &D,
13025                                         SourceLocation Loc,
13026                                         llvm::Function *OutlinedFn,
13027                                         ArrayRef<llvm::Value *> CapturedVars) {
13028   llvm_unreachable("Not supported in SIMD-only mode");
13029 }
13030 
13031 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13032                                              const Expr *NumTeams,
13033                                              const Expr *ThreadLimit,
13034                                              SourceLocation Loc) {
13035   llvm_unreachable("Not supported in SIMD-only mode");
13036 }
13037 
13038 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13039     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13040     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
13044 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13045     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13046     const Expr *Device) {
13047   llvm_unreachable("Not supported in SIMD-only mode");
13048 }
13049 
13050 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13051                                            const OMPLoopDirective &D,
13052                                            ArrayRef<Expr *> NumIterations) {
13053   llvm_unreachable("Not supported in SIMD-only mode");
13054 }
13055 
13056 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13057                                               const OMPDependClause *C) {
13058   llvm_unreachable("Not supported in SIMD-only mode");
13059 }
13060 
13061 const VarDecl *
13062 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13063                                         const VarDecl *NativeParam) const {
13064   llvm_unreachable("Not supported in SIMD-only mode");
13065 }
13066 
13067 Address
13068 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13069                                          const VarDecl *NativeParam,
13070                                          const VarDecl *TargetParam) const {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073