1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 #include <optional>
46 
47 using namespace clang;
48 using namespace CodeGen;
49 using namespace llvm::omp;
50 
51 namespace {
52 /// Base class for handling code generation inside OpenMP regions.
53 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
54 public:
55   /// Kinds of OpenMP regions used in codegen.
56   enum CGOpenMPRegionKind {
57     /// Region with outlined function for standalone 'parallel'
58     /// directive.
59     ParallelOutlinedRegion,
60     /// Region with outlined function for standalone 'task' directive.
61     TaskOutlinedRegion,
62     /// Region for constructs that do not require function outlining,
63     /// like 'for', 'sections', 'atomic' etc. directives.
64     InlinedRegion,
65     /// Region with outlined function for standalone 'target' directive.
66     TargetRegion,
67   };
68 
69   CGOpenMPRegionInfo(const CapturedStmt &CS,
70                      const CGOpenMPRegionKind RegionKind,
71                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72                      bool HasCancel)
73       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
74         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
75 
76   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
78                      bool HasCancel)
79       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
80         Kind(Kind), HasCancel(HasCancel) {}
81 
82   /// Get a variable or parameter for storing global thread id
83   /// inside OpenMP construct.
84   virtual const VarDecl *getThreadIDVariable() const = 0;
85 
86   /// Emit the captured statement body.
87   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
88 
89   /// Get an LValue for the current ThreadID variable.
90   /// \return LValue for thread id variable. This LValue always has type int32*.
91   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
92 
93   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
94 
95   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
96 
97   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
98 
99   bool hasCancel() const { return HasCancel; }
100 
101   static bool classof(const CGCapturedStmtInfo *Info) {
102     return Info->getKind() == CR_OpenMP;
103   }
104 
105   ~CGOpenMPRegionInfo() override = default;
106 
107 protected:
108   CGOpenMPRegionKind RegionKind;
109   RegionCodeGenTy CodeGen;
110   OpenMPDirectiveKind Kind;
111   bool HasCancel;
112 };
113 
114 /// API for captured statement code generation in OpenMP constructs.
115 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
116 public:
117   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
118                              const RegionCodeGenTy &CodeGen,
119                              OpenMPDirectiveKind Kind, bool HasCancel,
120                              StringRef HelperName)
121       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
122                            HasCancel),
123         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
124     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
125   }
126 
127   /// Get a variable or parameter for storing global thread id
128   /// inside OpenMP construct.
129   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
130 
131   /// Get the name of the capture helper.
132   StringRef getHelperName() const override { return HelperName; }
133 
134   static bool classof(const CGCapturedStmtInfo *Info) {
135     return CGOpenMPRegionInfo::classof(Info) &&
136            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
137                ParallelOutlinedRegion;
138   }
139 
140 private:
141   /// A variable or parameter storing global thread id for OpenMP
142   /// constructs.
143   const VarDecl *ThreadIDVar;
144   StringRef HelperName;
145 };
146 
147 /// API for captured statement code generation in OpenMP constructs.
148 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
149 public:
150   class UntiedTaskActionTy final : public PrePostActionTy {
151     bool Untied;
152     const VarDecl *PartIDVar;
153     const RegionCodeGenTy UntiedCodeGen;
154     llvm::SwitchInst *UntiedSwitch = nullptr;
155 
156   public:
157     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
158                        const RegionCodeGenTy &UntiedCodeGen)
159         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
160     void Enter(CodeGenFunction &CGF) override {
161       if (Untied) {
162         // Emit task switching point.
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         llvm::Value *Res =
167             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
168         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
169         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
170         CGF.EmitBlock(DoneBB);
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
174                               CGF.Builder.GetInsertBlock());
175         emitUntiedSwitch(CGF);
176       }
177     }
178     void emitUntiedSwitch(CodeGenFunction &CGF) const {
179       if (Untied) {
180         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
181             CGF.GetAddrOfLocalVar(PartIDVar),
182             PartIDVar->getType()->castAs<PointerType>());
183         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
184                               PartIdLVal);
185         UntiedCodeGen(CGF);
186         CodeGenFunction::JumpDest CurPoint =
187             CGF.getJumpDestInCurrentScope(".untied.next.");
188         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
189         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
190         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
191                               CGF.Builder.GetInsertBlock());
192         CGF.EmitBranchThroughCleanup(CurPoint);
193         CGF.EmitBlock(CurPoint.getBlock());
194       }
195     }
196     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
197   };
198   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
199                                  const VarDecl *ThreadIDVar,
200                                  const RegionCodeGenTy &CodeGen,
201                                  OpenMPDirectiveKind Kind, bool HasCancel,
202                                  const UntiedTaskActionTy &Action)
203       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
204         ThreadIDVar(ThreadIDVar), Action(Action) {
205     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
206   }
207 
208   /// Get a variable or parameter for storing global thread id
209   /// inside OpenMP construct.
210   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
211 
212   /// Get an LValue for the current ThreadID variable.
213   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
214 
215   /// Get the name of the capture helper.
216   StringRef getHelperName() const override { return ".omp_outlined."; }
217 
218   void emitUntiedSwitch(CodeGenFunction &CGF) override {
219     Action.emitUntiedSwitch(CGF);
220   }
221 
222   static bool classof(const CGCapturedStmtInfo *Info) {
223     return CGOpenMPRegionInfo::classof(Info) &&
224            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
225                TaskOutlinedRegion;
226   }
227 
228 private:
229   /// A variable or parameter storing global thread id for OpenMP
230   /// constructs.
231   const VarDecl *ThreadIDVar;
232   /// Action for emitting code for untied tasks.
233   const UntiedTaskActionTy &Action;
234 };
235 
236 /// API for inlined captured statement code generation in OpenMP
237 /// constructs.
238 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
239 public:
240   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
241                             const RegionCodeGenTy &CodeGen,
242                             OpenMPDirectiveKind Kind, bool HasCancel)
243       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
244         OldCSI(OldCSI),
245         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
246 
247   // Retrieve the value of the context parameter.
248   llvm::Value *getContextValue() const override {
249     if (OuterRegionInfo)
250       return OuterRegionInfo->getContextValue();
251     llvm_unreachable("No context value for inlined OpenMP region");
252   }
253 
254   void setContextValue(llvm::Value *V) override {
255     if (OuterRegionInfo) {
256       OuterRegionInfo->setContextValue(V);
257       return;
258     }
259     llvm_unreachable("No context value for inlined OpenMP region");
260   }
261 
262   /// Lookup the captured field decl for a variable.
263   const FieldDecl *lookup(const VarDecl *VD) const override {
264     if (OuterRegionInfo)
265       return OuterRegionInfo->lookup(VD);
266     // If there is no outer outlined region,no need to lookup in a list of
267     // captured variables, we can use the original one.
268     return nullptr;
269   }
270 
271   FieldDecl *getThisFieldDecl() const override {
272     if (OuterRegionInfo)
273       return OuterRegionInfo->getThisFieldDecl();
274     return nullptr;
275   }
276 
277   /// Get a variable or parameter for storing global thread id
278   /// inside OpenMP construct.
279   const VarDecl *getThreadIDVariable() const override {
280     if (OuterRegionInfo)
281       return OuterRegionInfo->getThreadIDVariable();
282     return nullptr;
283   }
284 
285   /// Get an LValue for the current ThreadID variable.
286   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
287     if (OuterRegionInfo)
288       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
289     llvm_unreachable("No LValue for inlined OpenMP construct");
290   }
291 
292   /// Get the name of the capture helper.
293   StringRef getHelperName() const override {
294     if (auto *OuterRegionInfo = getOldCSI())
295       return OuterRegionInfo->getHelperName();
296     llvm_unreachable("No helper name for inlined OpenMP construct");
297   }
298 
299   void emitUntiedSwitch(CodeGenFunction &CGF) override {
300     if (OuterRegionInfo)
301       OuterRegionInfo->emitUntiedSwitch(CGF);
302   }
303 
304   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
305 
306   static bool classof(const CGCapturedStmtInfo *Info) {
307     return CGOpenMPRegionInfo::classof(Info) &&
308            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
309   }
310 
311   ~CGOpenMPInlinedRegionInfo() override = default;
312 
313 private:
314   /// CodeGen info about outer OpenMP region.
315   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
316   CGOpenMPRegionInfo *OuterRegionInfo;
317 };
318 
319 /// API for captured statement code generation in OpenMP target
320 /// constructs. For this captures, implicit parameters are used instead of the
321 /// captured fields. The name of the target region has to be unique in a given
322 /// application so it is provided by the client, because only the client has
323 /// the information to generate that.
324 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
325 public:
326   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
327                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
328       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
329                            /*HasCancel=*/false),
330         HelperName(HelperName) {}
331 
332   /// This is unused for target regions because each starts executing
333   /// with a single thread.
334   const VarDecl *getThreadIDVariable() const override { return nullptr; }
335 
336   /// Get the name of the capture helper.
337   StringRef getHelperName() const override { return HelperName; }
338 
339   static bool classof(const CGCapturedStmtInfo *Info) {
340     return CGOpenMPRegionInfo::classof(Info) &&
341            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
342   }
343 
344 private:
345   StringRef HelperName;
346 };
347 
348 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
349   llvm_unreachable("No codegen for expressions");
350 }
351 /// API for generation of expressions captured in a innermost OpenMP
352 /// region.
353 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
354 public:
355   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
356       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
357                                   OMPD_unknown,
358                                   /*HasCancel=*/false),
359         PrivScope(CGF) {
360     // Make sure the globals captured in the provided statement are local by
361     // using the privatization logic. We assume the same variable is not
362     // captured more than once.
363     for (const auto &C : CS.captures()) {
364       if (!C.capturesVariable() && !C.capturesVariableByCopy())
365         continue;
366 
367       const VarDecl *VD = C.getCapturedVar();
368       if (VD->isLocalVarDeclOrParm())
369         continue;
370 
371       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
372                       /*RefersToEnclosingVariableOrCapture=*/false,
373                       VD->getType().getNonReferenceType(), VK_LValue,
374                       C.getLocation());
375       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
376     }
377     (void)PrivScope.Privatize();
378   }
379 
380   /// Lookup the captured field decl for a variable.
381   const FieldDecl *lookup(const VarDecl *VD) const override {
382     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383       return FD;
384     return nullptr;
385   }
386 
387   /// Emit the captured statement body.
388   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389     llvm_unreachable("No body for expressions");
390   }
391 
392   /// Get a variable or parameter for storing global thread id
393   /// inside OpenMP construct.
394   const VarDecl *getThreadIDVariable() const override {
395     llvm_unreachable("No thread id for expressions");
396   }
397 
398   /// Get the name of the capture helper.
399   StringRef getHelperName() const override {
400     llvm_unreachable("No helper name for expressions");
401   }
402 
403   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404 
405 private:
406   /// Private scope to capture global variables.
407   CodeGenFunction::OMPPrivateScope PrivScope;
408 };
409 
410 /// RAII for emitting code of OpenMP constructs.
411 class InlinedOpenMPRegionRAII {
412   CodeGenFunction &CGF;
413   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
414   FieldDecl *LambdaThisCaptureField = nullptr;
415   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416   bool NoInheritance = false;
417 
418 public:
419   /// Constructs region for combined constructs.
420   /// \param CodeGen Code generation sequence for combined directives. Includes
421   /// a list of functions used for code generation of implicitly inlined
422   /// regions.
423   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
424                           OpenMPDirectiveKind Kind, bool HasCancel,
425                           bool NoInheritance = true)
426       : CGF(CGF), NoInheritance(NoInheritance) {
427     // Start emission for the construct.
428     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
429         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
430     if (NoInheritance) {
431       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
433       CGF.LambdaThisCaptureField = nullptr;
434       BlockInfo = CGF.BlockInfo;
435       CGF.BlockInfo = nullptr;
436     }
437   }
438 
439   ~InlinedOpenMPRegionRAII() {
440     // Restore original CapturedStmtInfo only if we're done with code emission.
441     auto *OldCSI =
442         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
443     delete CGF.CapturedStmtInfo;
444     CGF.CapturedStmtInfo = OldCSI;
445     if (NoInheritance) {
446       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
447       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
448       CGF.BlockInfo = BlockInfo;
449     }
450   }
451 };
452 
453 /// Values for bit flags used in the ident_t to describe the fields.
454 /// All enumeric elements are named and described in accordance with the code
455 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456 enum OpenMPLocationFlags : unsigned {
457   /// Use trampoline for internal microtask.
458   OMP_IDENT_IMD = 0x01,
459   /// Use c-style ident structure.
460   OMP_IDENT_KMPC = 0x02,
461   /// Atomic reduction option for kmpc_reduce.
462   OMP_ATOMIC_REDUCE = 0x10,
463   /// Explicit 'barrier' directive.
464   OMP_IDENT_BARRIER_EXPL = 0x20,
465   /// Implicit barrier in code.
466   OMP_IDENT_BARRIER_IMPL = 0x40,
467   /// Implicit barrier in 'for' directive.
468   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
469   /// Implicit barrier in 'sections' directive.
470   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
471   /// Implicit barrier in 'single' directive.
472   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
473   /// Call of __kmp_for_static_init for static loop.
474   OMP_IDENT_WORK_LOOP = 0x200,
475   /// Call of __kmp_for_static_init for sections.
476   OMP_IDENT_WORK_SECTIONS = 0x400,
477   /// Call of __kmp_for_static_init for distribute.
478   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
479   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
480 };
481 
482 namespace {
483 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
484 /// Values for bit flags for marking which requires clauses have been used.
485 enum OpenMPOffloadingRequiresDirFlags : int64_t {
486   /// flag undefined.
487   OMP_REQ_UNDEFINED               = 0x000,
488   /// no requires clause present.
489   OMP_REQ_NONE                    = 0x001,
490   /// reverse_offload clause.
491   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
492   /// unified_address clause.
493   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
494   /// unified_shared_memory clause.
495   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
496   /// dynamic_allocators clause.
497   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
498   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
499 };
500 
501 enum OpenMPOffloadingReservedDeviceIDs {
502   /// Device ID if the device was not defined, runtime should get it
503   /// from environment variables in the spec.
504   OMP_DEVICEID_UNDEF = -1,
505 };
506 } // anonymous namespace
507 
508 /// Describes ident structure that describes a source location.
509 /// All descriptions are taken from
510 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
511 /// Original structure:
512 /// typedef struct ident {
513 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
514 ///                                  see above  */
515 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
516 ///                                  KMP_IDENT_KMPC identifies this union
517 ///                                  member  */
518 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
519 ///                                  see above */
520 ///#if USE_ITT_BUILD
521 ///                            /*  but currently used for storing
522 ///                                region-specific ITT */
523 ///                            /*  contextual information. */
524 ///#endif /* USE_ITT_BUILD */
525 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
526 ///                                 C++  */
527 ///    char const *psource;    /**< String describing the source location.
528 ///                            The string is composed of semi-colon separated
529 //                             fields which describe the source file,
530 ///                            the function and a pair of line numbers that
531 ///                            delimit the construct.
532 ///                             */
533 /// } ident_t;
534 enum IdentFieldIndex {
535   /// might be used in Fortran
536   IdentField_Reserved_1,
537   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
538   IdentField_Flags,
539   /// Not really used in Fortran any more
540   IdentField_Reserved_2,
541   /// Source[4] in Fortran, do not use for C++
542   IdentField_Reserved_3,
543   /// String describing the source location. The string is composed of
544   /// semi-colon separated fields which describe the source file, the function
545   /// and a pair of line numbers that delimit the construct.
546   IdentField_PSource
547 };
548 
549 /// Schedule types for 'omp for' loops (these enumerators are taken from
550 /// the enum sched_type in kmp.h).
551 enum OpenMPSchedType {
552   /// Lower bound for default (unordered) versions.
553   OMP_sch_lower = 32,
554   OMP_sch_static_chunked = 33,
555   OMP_sch_static = 34,
556   OMP_sch_dynamic_chunked = 35,
557   OMP_sch_guided_chunked = 36,
558   OMP_sch_runtime = 37,
559   OMP_sch_auto = 38,
560   /// static with chunk adjustment (e.g., simd)
561   OMP_sch_static_balanced_chunked = 45,
562   /// Lower bound for 'ordered' versions.
563   OMP_ord_lower = 64,
564   OMP_ord_static_chunked = 65,
565   OMP_ord_static = 66,
566   OMP_ord_dynamic_chunked = 67,
567   OMP_ord_guided_chunked = 68,
568   OMP_ord_runtime = 69,
569   OMP_ord_auto = 70,
570   OMP_sch_default = OMP_sch_static,
571   /// dist_schedule types
572   OMP_dist_sch_static_chunked = 91,
573   OMP_dist_sch_static = 92,
574   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
575   /// Set if the monotonic schedule modifier was present.
576   OMP_sch_modifier_monotonic = (1 << 29),
577   /// Set if the nonmonotonic schedule modifier was present.
578   OMP_sch_modifier_nonmonotonic = (1 << 30),
579 };
580 
581 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
582 /// region.
583 class CleanupTy final : public EHScopeStack::Cleanup {
584   PrePostActionTy *Action;
585 
586 public:
587   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
588   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
589     if (!CGF.HaveInsertPoint())
590       return;
591     Action->Exit(CGF);
592   }
593 };
594 
595 } // anonymous namespace
596 
597 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
598   CodeGenFunction::RunCleanupsScope Scope(CGF);
599   if (PrePostAction) {
600     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
601     Callback(CodeGen, CGF, *PrePostAction);
602   } else {
603     PrePostActionTy Action;
604     Callback(CodeGen, CGF, Action);
605   }
606 }
607 
608 /// Check if the combiner is a call to UDR combiner and if it is so return the
609 /// UDR decl used for reduction.
610 static const OMPDeclareReductionDecl *
611 getReductionInit(const Expr *ReductionOp) {
612   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
613     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
614       if (const auto *DRE =
615               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
616         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
617           return DRD;
618   return nullptr;
619 }
620 
621 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
622                                              const OMPDeclareReductionDecl *DRD,
623                                              const Expr *InitOp,
624                                              Address Private, Address Original,
625                                              QualType Ty) {
626   if (DRD->getInitializer()) {
627     std::pair<llvm::Function *, llvm::Function *> Reduction =
628         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
629     const auto *CE = cast<CallExpr>(InitOp);
630     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
631     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
632     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
633     const auto *LHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
635     const auto *RHSDRE =
636         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
637     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
638     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
639     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd =
701       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI, SrcAddr.getElementType(),
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI, DestAddr.getElementType(),
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747         "omp.arraycpy.dest.element");
748     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749   }
750 
751   // Shift the address forward by one element.
752   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754       "omp.arraycpy.dest.element");
755   // Check whether we've reached the end.
756   llvm::Value *Done =
757       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 
761   // Done.
762   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763 }
764 
765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766   return CGF.EmitOMPSharedLValue(E);
767 }
768 
769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770                                             const Expr *E) {
771   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773   return LValue();
774 }
775 
776 void ReductionCodeGen::emitAggregateInitialization(
777     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778     const OMPDeclareReductionDecl *DRD) {
779   // Emit VarDecl with copy init for arrays.
780   // Get the address of the original variable captured in current
781   // captured region.
782   const auto *PrivateVD =
783       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784   bool EmitDeclareReductionInit =
785       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787                        EmitDeclareReductionInit,
788                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789                                                 : PrivateVD->getInit(),
790                        DRD, SharedAddr);
791 }
792 
793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794                                    ArrayRef<const Expr *> Origs,
795                                    ArrayRef<const Expr *> Privates,
796                                    ArrayRef<const Expr *> ReductionOps) {
797   ClausesData.reserve(Shareds.size());
798   SharedAddresses.reserve(Shareds.size());
799   Sizes.reserve(Shareds.size());
800   BaseDecls.reserve(Shareds.size());
801   const auto *IOrig = Origs.begin();
802   const auto *IPriv = Privates.begin();
803   const auto *IRed = ReductionOps.begin();
804   for (const Expr *Ref : Shareds) {
805     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806     std::advance(IOrig, 1);
807     std::advance(IPriv, 1);
808     std::advance(IRed, 1);
809   }
810 }
811 
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
814          "Number of generated lvalues must be exactly N.");
815   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817   SharedAddresses.emplace_back(First, Second);
818   if (ClausesData[N].Shared == ClausesData[N].Ref) {
819     OrigAddresses.emplace_back(First, Second);
820   } else {
821     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823     OrigAddresses.emplace_back(First, Second);
824   }
825 }
826 
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828   QualType PrivateType = getPrivateType(N);
829   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830   if (!PrivateType->isVariablyModifiedType()) {
831     Sizes.emplace_back(
832         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833         nullptr);
834     return;
835   }
836   llvm::Value *Size;
837   llvm::Value *SizeInChars;
838   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
839   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
840   if (AsArraySection) {
841     Size = CGF.Builder.CreatePtrDiff(ElemType,
842                                      OrigAddresses[N].second.getPointer(CGF),
843                                      OrigAddresses[N].first.getPointer(CGF));
844     Size = CGF.Builder.CreateNUWAdd(
845         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
846     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
847   } else {
848     SizeInChars =
849         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
850     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
851   }
852   Sizes.emplace_back(SizeInChars, Size);
853   CodeGenFunction::OpaqueValueMapping OpaqueMap(
854       CGF,
855       cast<OpaqueValueExpr>(
856           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
857       RValue::get(Size));
858   CGF.EmitVariablyModifiedType(PrivateType);
859 }
860 
861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
862                                          llvm::Value *Size) {
863   QualType PrivateType = getPrivateType(N);
864   if (!PrivateType->isVariablyModifiedType()) {
865     assert(!Size && !Sizes[N].second &&
866            "Size should be nullptr for non-variably modified reduction "
867            "items.");
868     return;
869   }
870   CodeGenFunction::OpaqueValueMapping OpaqueMap(
871       CGF,
872       cast<OpaqueValueExpr>(
873           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
874       RValue::get(Size));
875   CGF.EmitVariablyModifiedType(PrivateType);
876 }
877 
878 void ReductionCodeGen::emitInitialization(
879     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
880     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
881   assert(SharedAddresses.size() > N && "No variable was generated");
882   const auto *PrivateVD =
883       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
884   const OMPDeclareReductionDecl *DRD =
885       getReductionInit(ClausesData[N].ReductionOp);
886   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
887     if (DRD && DRD->getInitializer())
888       (void)DefaultInit(CGF);
889     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
890   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
891     (void)DefaultInit(CGF);
892     QualType SharedType = SharedAddresses[N].first.getType();
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedAddr, SharedType);
895   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
896              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
897     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
898                          PrivateVD->getType().getQualifiers(),
899                          /*IsInitializer=*/false);
900   }
901 }
902 
903 bool ReductionCodeGen::needCleanups(unsigned N) {
904   QualType PrivateType = getPrivateType(N);
905   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
906   return DTorKind != QualType::DK_none;
907 }
908 
909 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
910                                     Address PrivateAddr) {
911   QualType PrivateType = getPrivateType(N);
912   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
913   if (needCleanups(N)) {
914     PrivateAddr = CGF.Builder.CreateElementBitCast(
915         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
916     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
917   }
918 }
919 
920 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
921                           LValue BaseLV) {
922   BaseTy = BaseTy.getNonReferenceType();
923   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
924          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
925     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
926       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
927     } else {
928       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
929       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
930     }
931     BaseTy = BaseTy->getPointeeType();
932   }
933   return CGF.MakeAddrLValue(
934       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
935                                        CGF.ConvertTypeForMem(ElTy)),
936       BaseLV.getType(), BaseLV.getBaseInfo(),
937       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
938 }
939 
940 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
941                           Address OriginalBaseAddress, llvm::Value *Addr) {
942   Address Tmp = Address::invalid();
943   Address TopTmp = Address::invalid();
944   Address MostTopTmp = Address::invalid();
945   BaseTy = BaseTy.getNonReferenceType();
946   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
947          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
948     Tmp = CGF.CreateMemTemp(BaseTy);
949     if (TopTmp.isValid())
950       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
951     else
952       MostTopTmp = Tmp;
953     TopTmp = Tmp;
954     BaseTy = BaseTy->getPointeeType();
955   }
956 
957   if (Tmp.isValid()) {
958     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
959         Addr, Tmp.getElementType());
960     CGF.Builder.CreateStore(Addr, Tmp);
961     return MostTopTmp;
962   }
963 
964   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
965       Addr, OriginalBaseAddress.getType());
966   return OriginalBaseAddress.withPointer(Addr);
967 }
968 
969 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
970   const VarDecl *OrigVD = nullptr;
971   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
972     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
973     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
974       Base = TempOASE->getBase()->IgnoreParenImpCasts();
975     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
976       Base = TempASE->getBase()->IgnoreParenImpCasts();
977     DE = cast<DeclRefExpr>(Base);
978     OrigVD = cast<VarDecl>(DE->getDecl());
979   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
980     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
981     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
982       Base = TempASE->getBase()->IgnoreParenImpCasts();
983     DE = cast<DeclRefExpr>(Base);
984     OrigVD = cast<VarDecl>(DE->getDecl());
985   }
986   return OrigVD;
987 }
988 
989 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
990                                                Address PrivateAddr) {
991   const DeclRefExpr *DE;
992   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
993     BaseDecls.emplace_back(OrigVD);
994     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
995     LValue BaseLValue =
996         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
997                     OriginalBaseLValue);
998     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
999     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1000         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1001         SharedAddr.getPointer());
1002     llvm::Value *PrivatePointer =
1003         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1004             PrivateAddr.getPointer(), SharedAddr.getType());
1005     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1006         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1007     return castToBase(CGF, OrigVD->getType(),
1008                       SharedAddresses[N].first.getType(),
1009                       OriginalBaseLValue.getAddress(CGF), Ptr);
1010   }
1011   BaseDecls.emplace_back(
1012       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1013   return PrivateAddr;
1014 }
1015 
1016 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1017   const OMPDeclareReductionDecl *DRD =
1018       getReductionInit(ClausesData[N].ReductionOp);
1019   return DRD && DRD->getInitializer();
1020 }
1021 
1022 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1023   return CGF.EmitLoadOfPointerLValue(
1024       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1025       getThreadIDVariable()->getType()->castAs<PointerType>());
1026 }
1027 
1028 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1029   if (!CGF.HaveInsertPoint())
1030     return;
1031   // 1.2.2 OpenMP Language Terminology
1032   // Structured block - An executable statement with a single entry at the
1033   // top and a single exit at the bottom.
1034   // The point of exit cannot be a branch out of the structured block.
1035   // longjmp() and throw() must not violate the entry/exit criteria.
1036   CGF.EHStack.pushTerminate();
1037   if (S)
1038     CGF.incrementProfileCounter(S);
1039   CodeGen(CGF);
1040   CGF.EHStack.popTerminate();
1041 }
1042 
1043 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1044     CodeGenFunction &CGF) {
1045   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1046                             getThreadIDVariable()->getType(),
1047                             AlignmentSource::Decl);
1048 }
1049 
1050 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1051                                        QualType FieldTy) {
1052   auto *Field = FieldDecl::Create(
1053       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1054       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1055       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1056   Field->setAccess(AS_public);
1057   DC->addDecl(Field);
1058   return Field;
1059 }
1060 
1061 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1062     : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() {
1063   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1064   llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false,
1065                                      hasRequiresUnifiedSharedMemory(),
1066                                      CGM.getLangOpts().OpenMPOffloadMandatory);
1067   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1068   OMPBuilder.initialize();
1069   OMPBuilder.setConfig(Config);
1070   OffloadEntriesInfoManager.setConfig(Config);
1071   loadOffloadInfoMetadata();
1072 }
1073 
1074 void CGOpenMPRuntime::clear() {
1075   InternalVars.clear();
1076   // Clean non-target variable declarations possibly used only in debug info.
1077   for (const auto &Data : EmittedNonTargetVariables) {
1078     if (!Data.getValue().pointsToAliveValue())
1079       continue;
1080     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1081     if (!GV)
1082       continue;
1083     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1084       continue;
1085     GV->eraseFromParent();
1086   }
1087 }
1088 
1089 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1090   return OMPBuilder.createPlatformSpecificName(Parts);
1091 }
1092 
1093 static llvm::Function *
1094 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1095                           const Expr *CombinerInitializer, const VarDecl *In,
1096                           const VarDecl *Out, bool IsCombiner) {
1097   // void .omp_combiner.(Ty *in, Ty *out);
1098   ASTContext &C = CGM.getContext();
1099   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1100   FunctionArgList Args;
1101   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1102                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1103   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1104                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1105   Args.push_back(&OmpOutParm);
1106   Args.push_back(&OmpInParm);
1107   const CGFunctionInfo &FnInfo =
1108       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1109   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1110   std::string Name = CGM.getOpenMPRuntime().getName(
1111       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1112   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1113                                     Name, &CGM.getModule());
1114   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1115   if (CGM.getLangOpts().Optimize) {
1116     Fn->removeFnAttr(llvm::Attribute::NoInline);
1117     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1118     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1119   }
1120   CodeGenFunction CGF(CGM);
1121   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1122   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1123   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1124                     Out->getLocation());
1125   CodeGenFunction::OMPPrivateScope Scope(CGF);
1126   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1127   Scope.addPrivate(
1128       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1129               .getAddress(CGF));
1130   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1131   Scope.addPrivate(
1132       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1133                .getAddress(CGF));
1134   (void)Scope.Privatize();
1135   if (!IsCombiner && Out->hasInit() &&
1136       !CGF.isTrivialInitializer(Out->getInit())) {
1137     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1138                          Out->getType().getQualifiers(),
1139                          /*IsInitializer=*/true);
1140   }
1141   if (CombinerInitializer)
1142     CGF.EmitIgnoredExpr(CombinerInitializer);
1143   Scope.ForceCleanup();
1144   CGF.FinishFunction();
1145   return Fn;
1146 }
1147 
1148 void CGOpenMPRuntime::emitUserDefinedReduction(
1149     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1150   if (UDRMap.count(D) > 0)
1151     return;
1152   llvm::Function *Combiner = emitCombinerOrInitializer(
1153       CGM, D->getType(), D->getCombiner(),
1154       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1155       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1156       /*IsCombiner=*/true);
1157   llvm::Function *Initializer = nullptr;
1158   if (const Expr *Init = D->getInitializer()) {
1159     Initializer = emitCombinerOrInitializer(
1160         CGM, D->getType(),
1161         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1162                                                                      : nullptr,
1163         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1164         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1165         /*IsCombiner=*/false);
1166   }
1167   UDRMap.try_emplace(D, Combiner, Initializer);
1168   if (CGF) {
1169     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1170     Decls.second.push_back(D);
1171   }
1172 }
1173 
1174 std::pair<llvm::Function *, llvm::Function *>
1175 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1176   auto I = UDRMap.find(D);
1177   if (I != UDRMap.end())
1178     return I->second;
1179   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1180   return UDRMap.lookup(D);
1181 }
1182 
1183 namespace {
1184 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1185 // Builder if one is present.
1186 struct PushAndPopStackRAII {
1187   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1188                       bool HasCancel, llvm::omp::Directive Kind)
1189       : OMPBuilder(OMPBuilder) {
1190     if (!OMPBuilder)
1191       return;
1192 
1193     // The following callback is the crucial part of clangs cleanup process.
1194     //
1195     // NOTE:
1196     // Once the OpenMPIRBuilder is used to create parallel regions (and
1197     // similar), the cancellation destination (Dest below) is determined via
1198     // IP. That means if we have variables to finalize we split the block at IP,
1199     // use the new block (=BB) as destination to build a JumpDest (via
1200     // getJumpDestInCurrentScope(BB)) which then is fed to
1201     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1202     // to push & pop an FinalizationInfo object.
1203     // The FiniCB will still be needed but at the point where the
1204     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1205     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1206       assert(IP.getBlock()->end() == IP.getPoint() &&
1207              "Clang CG should cause non-terminated block!");
1208       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1209       CGF.Builder.restoreIP(IP);
1210       CodeGenFunction::JumpDest Dest =
1211           CGF.getOMPCancelDestination(OMPD_parallel);
1212       CGF.EmitBranchThroughCleanup(Dest);
1213     };
1214 
1215     // TODO: Remove this once we emit parallel regions through the
1216     //       OpenMPIRBuilder as it can do this setup internally.
1217     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1218     OMPBuilder->pushFinalizationCB(std::move(FI));
1219   }
1220   ~PushAndPopStackRAII() {
1221     if (OMPBuilder)
1222       OMPBuilder->popFinalizationCB();
1223   }
1224   llvm::OpenMPIRBuilder *OMPBuilder;
1225 };
1226 } // namespace
1227 
1228 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1229     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1230     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1231     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1232   assert(ThreadIDVar->getType()->isPointerType() &&
1233          "thread id variable must be of type kmp_int32 *");
1234   CodeGenFunction CGF(CGM, true);
1235   bool HasCancel = false;
1236   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1237     HasCancel = OPD->hasCancel();
1238   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1239     HasCancel = OPD->hasCancel();
1240   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1241     HasCancel = OPSD->hasCancel();
1242   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1243     HasCancel = OPFD->hasCancel();
1244   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1245     HasCancel = OPFD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD =
1249                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1250     HasCancel = OPFD->hasCancel();
1251   else if (const auto *OPFD =
1252                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1253     HasCancel = OPFD->hasCancel();
1254 
1255   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1256   //       parallel region to make cancellation barriers work properly.
1257   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1258   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1259   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1260                                     HasCancel, OutlinedHelperName);
1261   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1262   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1263 }
1264 
1265 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1266     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1267     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1268   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1269   return emitParallelOrTeamsOutlinedFunction(
1270       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1271 }
1272 
1273 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1275     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1276   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1277   return emitParallelOrTeamsOutlinedFunction(
1278       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1279 }
1280 
1281 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1282     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285     bool Tied, unsigned &NumberOfParts) {
1286   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287                                               PrePostActionTy &) {
1288     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290     llvm::Value *TaskArgs[] = {
1291         UpLoc, ThreadID,
1292         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293                                     TaskTVar->getType()->castAs<PointerType>())
1294             .getPointer(CGF)};
1295     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1297                         TaskArgs);
1298   };
1299   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300                                                             UntiedCodeGen);
1301   CodeGen.setAction(Action);
1302   assert(!ThreadIDVar->getType()->isPointerType() &&
1303          "thread id variable must be of type kmp_int32 for tasks");
1304   const OpenMPDirectiveKind Region =
1305       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306                                                       : OMPD_task;
1307   const CapturedStmt *CS = D.getCapturedStmt(Region);
1308   bool HasCancel = false;
1309   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310     HasCancel = TD->hasCancel();
1311   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312     HasCancel = TD->hasCancel();
1313   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317 
1318   CodeGenFunction CGF(CGM, true);
1319   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320                                         InnermostKind, HasCancel, Action);
1321   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323   if (!Tied)
1324     NumberOfParts = Action.getNumberOfParts();
1325   return Res;
1326 }
1327 
1328 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1329                                              bool AtCurrentPoint) {
1330   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332 
1333   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334   if (AtCurrentPoint) {
1335     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337   } else {
1338     Elem.second.ServiceInsertPt =
1339         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341   }
1342 }
1343 
1344 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1345   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346   if (Elem.second.ServiceInsertPt) {
1347     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348     Elem.second.ServiceInsertPt = nullptr;
1349     Ptr->eraseFromParent();
1350   }
1351 }
1352 
1353 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1354                                                   SourceLocation Loc,
1355                                                   SmallString<128> &Buffer) {
1356   llvm::raw_svector_ostream OS(Buffer);
1357   // Build debug location
1358   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1359   OS << ";" << PLoc.getFilename() << ";";
1360   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361     OS << FD->getQualifiedNameAsString();
1362   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363   return OS.str();
1364 }
1365 
1366 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1367                                                  SourceLocation Loc,
1368                                                  unsigned Flags, bool EmitLoc) {
1369   uint32_t SrcLocStrSize;
1370   llvm::Constant *SrcLocStr;
1371   if ((!EmitLoc &&
1372        CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) ||
1373       Loc.isInvalid()) {
1374     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375   } else {
1376     std::string FunctionName;
1377     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378       FunctionName = FD->getQualifiedNameAsString();
1379     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1380     const char *FileName = PLoc.getFilename();
1381     unsigned Line = PLoc.getLine();
1382     unsigned Column = PLoc.getColumn();
1383     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384                                                 Column, SrcLocStrSize);
1385   }
1386   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387   return OMPBuilder.getOrCreateIdent(
1388       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389 }
1390 
1391 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1392                                           SourceLocation Loc) {
1393   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395   // the clang invariants used below might be broken.
1396   if (CGM.getLangOpts().OpenMPIRBuilder) {
1397     SmallString<128> Buffer;
1398     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399     uint32_t SrcLocStrSize;
1400     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402     return OMPBuilder.getOrCreateThreadID(
1403         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404   }
1405 
1406   llvm::Value *ThreadID = nullptr;
1407   // Check whether we've already cached a load of the thread id in this
1408   // function.
1409   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410   if (I != OpenMPLocThreadIDMap.end()) {
1411     ThreadID = I->second.ThreadID;
1412     if (ThreadID != nullptr)
1413       return ThreadID;
1414   }
1415   // If exceptions are enabled, do not use parameter to avoid possible crash.
1416   if (auto *OMPRegionInfo =
1417           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418     if (OMPRegionInfo->getThreadIDVariable()) {
1419       // Check if this an outlined function with thread id passed as argument.
1420       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423           !CGF.getLangOpts().CXXExceptions ||
1424           CGF.Builder.GetInsertBlock() == TopBlock ||
1425           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427               TopBlock ||
1428           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429               CGF.Builder.GetInsertBlock()) {
1430         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431         // If value loaded in entry block, cache it and use it everywhere in
1432         // function.
1433         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435           Elem.second.ThreadID = ThreadID;
1436         }
1437         return ThreadID;
1438       }
1439     }
1440   }
1441 
1442   // This is not an outlined function region - need to call __kmpc_int32
1443   // kmpc_global_thread_num(ident_t *loc).
1444   // Generate thread id value and cache this value for use across the
1445   // function.
1446   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447   if (!Elem.second.ServiceInsertPt)
1448     setLocThreadIdInsertPt(CGF);
1449   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1451   llvm::CallInst *Call = CGF.Builder.CreateCall(
1452       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1453                                             OMPRTL___kmpc_global_thread_num),
1454       emitUpdateLocation(CGF, Loc));
1455   Call->setCallingConv(CGF.getRuntimeCC());
1456   Elem.second.ThreadID = Call;
1457   return Call;
1458 }
1459 
1460 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1463     clearLocThreadIdInsertPt(CGF);
1464     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1465   }
1466   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1467     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1468       UDRMap.erase(D);
1469     FunctionUDRMap.erase(CGF.CurFn);
1470   }
1471   auto I = FunctionUDMMap.find(CGF.CurFn);
1472   if (I != FunctionUDMMap.end()) {
1473     for(const auto *D : I->second)
1474       UDMMap.erase(D);
1475     FunctionUDMMap.erase(I);
1476   }
1477   LastprivateConditionalToTypes.erase(CGF.CurFn);
1478   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1479 }
1480 
1481 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1482   return OMPBuilder.IdentPtr;
1483 }
1484 
1485 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1486   if (!Kmpc_MicroTy) {
1487     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1488     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1489                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1490     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1491   }
1492   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1493 }
1494 
1495 llvm::FunctionCallee
1496 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1497                                              bool IsGPUDistribute) {
1498   assert((IVSize == 32 || IVSize == 64) &&
1499          "IV size is not compatible with the omp runtime");
1500   StringRef Name;
1501   if (IsGPUDistribute)
1502     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1503                                     : "__kmpc_distribute_static_init_4u")
1504                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1505                                     : "__kmpc_distribute_static_init_8u");
1506   else
1507     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1508                                     : "__kmpc_for_static_init_4u")
1509                         : (IVSigned ? "__kmpc_for_static_init_8"
1510                                     : "__kmpc_for_static_init_8u");
1511 
1512   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1513   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1514   llvm::Type *TypeParams[] = {
1515     getIdentTyPointerTy(),                     // loc
1516     CGM.Int32Ty,                               // tid
1517     CGM.Int32Ty,                               // schedtype
1518     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1519     PtrTy,                                     // p_lower
1520     PtrTy,                                     // p_upper
1521     PtrTy,                                     // p_stride
1522     ITy,                                       // incr
1523     ITy                                        // chunk
1524   };
1525   auto *FnTy =
1526       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1527   return CGM.CreateRuntimeFunction(FnTy, Name);
1528 }
1529 
1530 llvm::FunctionCallee
1531 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1532   assert((IVSize == 32 || IVSize == 64) &&
1533          "IV size is not compatible with the omp runtime");
1534   StringRef Name =
1535       IVSize == 32
1536           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1537           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1538   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1539   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1540                                CGM.Int32Ty,           // tid
1541                                CGM.Int32Ty,           // schedtype
1542                                ITy,                   // lower
1543                                ITy,                   // upper
1544                                ITy,                   // stride
1545                                ITy                    // chunk
1546   };
1547   auto *FnTy =
1548       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1549   return CGM.CreateRuntimeFunction(FnTy, Name);
1550 }
1551 
1552 llvm::FunctionCallee
1553 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1554   assert((IVSize == 32 || IVSize == 64) &&
1555          "IV size is not compatible with the omp runtime");
1556   StringRef Name =
1557       IVSize == 32
1558           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1559           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1560   llvm::Type *TypeParams[] = {
1561       getIdentTyPointerTy(), // loc
1562       CGM.Int32Ty,           // tid
1563   };
1564   auto *FnTy =
1565       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1566   return CGM.CreateRuntimeFunction(FnTy, Name);
1567 }
1568 
1569 llvm::FunctionCallee
1570 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1571   assert((IVSize == 32 || IVSize == 64) &&
1572          "IV size is not compatible with the omp runtime");
1573   StringRef Name =
1574       IVSize == 32
1575           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1576           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1577   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1578   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1579   llvm::Type *TypeParams[] = {
1580     getIdentTyPointerTy(),                     // loc
1581     CGM.Int32Ty,                               // tid
1582     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1583     PtrTy,                                     // p_lower
1584     PtrTy,                                     // p_upper
1585     PtrTy                                      // p_stride
1586   };
1587   auto *FnTy =
1588       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1589   return CGM.CreateRuntimeFunction(FnTy, Name);
1590 }
1591 
1592 /// Obtain information that uniquely identifies a target entry. This
1593 /// consists of the file and device IDs as well as line number associated with
1594 /// the relevant entry source location.
1595 static llvm::TargetRegionEntryInfo
1596 getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1597                          StringRef ParentName = "") {
1598   SourceManager &SM = C.getSourceManager();
1599 
1600   // The loc should be always valid and have a file ID (the user cannot use
1601   // #pragma directives in macros)
1602 
1603   assert(Loc.isValid() && "Source location is expected to be always valid.");
1604 
1605   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1606   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1607 
1608   llvm::sys::fs::UniqueID ID;
1609   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1610     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1611     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1612     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1613       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1614           << PLoc.getFilename() << EC.message();
1615   }
1616 
1617   return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
1618                                      PLoc.getLine());
1619 }
1620 
1621 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1622   if (CGM.getLangOpts().OpenMPSimd)
1623     return Address::invalid();
1624   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1625       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1626   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1627               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1628                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1629                HasRequiresUnifiedSharedMemory))) {
1630     SmallString<64> PtrName;
1631     {
1632       llvm::raw_svector_ostream OS(PtrName);
1633       OS << CGM.getMangledName(GlobalDecl(VD));
1634       if (!VD->isExternallyVisible()) {
1635         auto EntryInfo = getTargetEntryUniqueInfo(
1636             CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc());
1637         OS << llvm::format("_%x", EntryInfo.FileID);
1638       }
1639       OS << "_decl_tgt_ref_ptr";
1640     }
1641     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1642     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1643     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1644     if (!Ptr) {
1645       Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1646 
1647       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1648       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1649 
1650       if (!CGM.getLangOpts().OpenMPIsDevice)
1651         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1652       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1653     }
1654     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1655   }
1656   return Address::invalid();
1657 }
1658 
1659 llvm::Constant *
1660 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1661   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1662          !CGM.getContext().getTargetInfo().isTLSSupported());
1663   // Lookup the entry, lazily creating it if necessary.
1664   std::string Suffix = getName({"cache", ""});
1665   return OMPBuilder.getOrCreateInternalVariable(
1666       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1667 }
1668 
1669 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1670                                                 const VarDecl *VD,
1671                                                 Address VDAddr,
1672                                                 SourceLocation Loc) {
1673   if (CGM.getLangOpts().OpenMPUseTLS &&
1674       CGM.getContext().getTargetInfo().isTLSSupported())
1675     return VDAddr;
1676 
1677   llvm::Type *VarTy = VDAddr.getElementType();
1678   llvm::Value *Args[] = {
1679       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1680       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1681       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1682       getOrCreateThreadPrivateCache(VD)};
1683   return Address(
1684       CGF.EmitRuntimeCall(
1685           OMPBuilder.getOrCreateRuntimeFunction(
1686               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1687           Args),
1688       CGF.Int8Ty, VDAddr.getAlignment());
1689 }
1690 
1691 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1692     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1693     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1694   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1695   // library.
1696   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1697   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1698                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1699                       OMPLoc);
1700   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1701   // to register constructor/destructor for variable.
1702   llvm::Value *Args[] = {
1703       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1704       Ctor, CopyCtor, Dtor};
1705   CGF.EmitRuntimeCall(
1706       OMPBuilder.getOrCreateRuntimeFunction(
1707           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1708       Args);
1709 }
1710 
1711 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1712     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1713     bool PerformInit, CodeGenFunction *CGF) {
1714   if (CGM.getLangOpts().OpenMPUseTLS &&
1715       CGM.getContext().getTargetInfo().isTLSSupported())
1716     return nullptr;
1717 
1718   VD = VD->getDefinition(CGM.getContext());
1719   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1720     QualType ASTTy = VD->getType();
1721 
1722     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1723     const Expr *Init = VD->getAnyInitializer();
1724     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1725       // Generate function that re-emits the declaration's initializer into the
1726       // threadprivate copy of the variable VD
1727       CodeGenFunction CtorCGF(CGM);
1728       FunctionArgList Args;
1729       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1730                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1731                             ImplicitParamDecl::Other);
1732       Args.push_back(&Dst);
1733 
1734       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1735           CGM.getContext().VoidPtrTy, Args);
1736       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1737       std::string Name = getName({"__kmpc_global_ctor_", ""});
1738       llvm::Function *Fn =
1739           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1740       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1741                             Args, Loc, Loc);
1742       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1743           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1744           CGM.getContext().VoidPtrTy, Dst.getLocation());
1745       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1746       Arg = CtorCGF.Builder.CreateElementBitCast(
1747           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1748       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1749                                /*IsInitializer=*/true);
1750       ArgVal = CtorCGF.EmitLoadOfScalar(
1751           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1752           CGM.getContext().VoidPtrTy, Dst.getLocation());
1753       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1754       CtorCGF.FinishFunction();
1755       Ctor = Fn;
1756     }
1757     if (VD->getType().isDestructedType() != QualType::DK_none) {
1758       // Generate function that emits destructor call for the threadprivate copy
1759       // of the variable VD
1760       CodeGenFunction DtorCGF(CGM);
1761       FunctionArgList Args;
1762       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1763                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1764                             ImplicitParamDecl::Other);
1765       Args.push_back(&Dst);
1766 
1767       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1768           CGM.getContext().VoidTy, Args);
1769       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1770       std::string Name = getName({"__kmpc_global_dtor_", ""});
1771       llvm::Function *Fn =
1772           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1773       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1774       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1775                             Loc, Loc);
1776       // Create a scope with an artificial location for the body of this function.
1777       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1778       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1779           DtorCGF.GetAddrOfLocalVar(&Dst),
1780           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1781       DtorCGF.emitDestroy(
1782           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1783           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1784           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1785       DtorCGF.FinishFunction();
1786       Dtor = Fn;
1787     }
1788     // Do not emit init function if it is not required.
1789     if (!Ctor && !Dtor)
1790       return nullptr;
1791 
1792     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1793     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1794                                                /*isVarArg=*/false)
1795                            ->getPointerTo();
1796     // Copying constructor for the threadprivate variable.
1797     // Must be NULL - reserved by runtime, but currently it requires that this
1798     // parameter is always NULL. Otherwise it fires assertion.
1799     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1800     if (Ctor == nullptr) {
1801       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1802                                              /*isVarArg=*/false)
1803                          ->getPointerTo();
1804       Ctor = llvm::Constant::getNullValue(CtorTy);
1805     }
1806     if (Dtor == nullptr) {
1807       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1808                                              /*isVarArg=*/false)
1809                          ->getPointerTo();
1810       Dtor = llvm::Constant::getNullValue(DtorTy);
1811     }
1812     if (!CGF) {
1813       auto *InitFunctionTy =
1814           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1815       std::string Name = getName({"__omp_threadprivate_init_", ""});
1816       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1817           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1818       CodeGenFunction InitCGF(CGM);
1819       FunctionArgList ArgList;
1820       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1821                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1822                             Loc, Loc);
1823       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1824       InitCGF.FinishFunction();
1825       return InitFunction;
1826     }
1827     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1828   }
1829   return nullptr;
1830 }
1831 
1832 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1833                                                      llvm::GlobalVariable *Addr,
1834                                                      bool PerformInit) {
1835   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1836       !CGM.getLangOpts().OpenMPIsDevice)
1837     return false;
1838   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1839       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1840   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1841       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1842         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1843        HasRequiresUnifiedSharedMemory))
1844     return CGM.getLangOpts().OpenMPIsDevice;
1845   VD = VD->getDefinition(CGM.getContext());
1846   assert(VD && "Unknown VarDecl");
1847 
1848   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1849     return CGM.getLangOpts().OpenMPIsDevice;
1850 
1851   QualType ASTTy = VD->getType();
1852   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1853 
1854   // Produce the unique prefix to identify the new target regions. We use
1855   // the source location of the variable declaration which we know to not
1856   // conflict with any target region.
1857   auto EntryInfo =
1858       getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName());
1859   SmallString<128> Buffer, Out;
1860   OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1861 
1862   const Expr *Init = VD->getAnyInitializer();
1863   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1864     llvm::Constant *Ctor;
1865     llvm::Constant *ID;
1866     if (CGM.getLangOpts().OpenMPIsDevice) {
1867       // Generate function that re-emits the declaration's initializer into
1868       // the threadprivate copy of the variable VD
1869       CodeGenFunction CtorCGF(CGM);
1870 
1871       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1872       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1873       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1874           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1875           llvm::GlobalValue::WeakODRLinkage);
1876       Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1877       if (CGM.getTriple().isAMDGCN())
1878         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1879       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1880       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1881                             FunctionArgList(), Loc, Loc);
1882       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1883       llvm::Constant *AddrInAS0 = Addr;
1884       if (Addr->getAddressSpace() != 0)
1885         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1886             Addr, llvm::PointerType::getWithSamePointeeType(
1887                       cast<llvm::PointerType>(Addr->getType()), 0));
1888       CtorCGF.EmitAnyExprToMem(Init,
1889                                Address(AddrInAS0, Addr->getValueType(),
1890                                        CGM.getContext().getDeclAlign(VD)),
1891                                Init->getType().getQualifiers(),
1892                                /*IsInitializer=*/true);
1893       CtorCGF.FinishFunction();
1894       Ctor = Fn;
1895       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1896     } else {
1897       Ctor = new llvm::GlobalVariable(
1898           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1899           llvm::GlobalValue::PrivateLinkage,
1900           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1901       ID = Ctor;
1902     }
1903 
1904     // Register the information for the entry associated with the constructor.
1905     Out.clear();
1906     auto CtorEntryInfo = EntryInfo;
1907     CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1908     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1909         CtorEntryInfo, Ctor, ID,
1910         llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1911   }
1912   if (VD->getType().isDestructedType() != QualType::DK_none) {
1913     llvm::Constant *Dtor;
1914     llvm::Constant *ID;
1915     if (CGM.getLangOpts().OpenMPIsDevice) {
1916       // Generate function that emits destructor call for the threadprivate
1917       // copy of the variable VD
1918       CodeGenFunction DtorCGF(CGM);
1919 
1920       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1921       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1922       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1923           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1924           llvm::GlobalValue::WeakODRLinkage);
1925       Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1926       if (CGM.getTriple().isAMDGCN())
1927         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1928       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1929       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1930                             FunctionArgList(), Loc, Loc);
1931       // Create a scope with an artificial location for the body of this
1932       // function.
1933       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1934       llvm::Constant *AddrInAS0 = Addr;
1935       if (Addr->getAddressSpace() != 0)
1936         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1937             Addr, llvm::PointerType::getWithSamePointeeType(
1938                       cast<llvm::PointerType>(Addr->getType()), 0));
1939       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1940                                   CGM.getContext().getDeclAlign(VD)),
1941                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1942                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1943       DtorCGF.FinishFunction();
1944       Dtor = Fn;
1945       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1946     } else {
1947       Dtor = new llvm::GlobalVariable(
1948           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949           llvm::GlobalValue::PrivateLinkage,
1950           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1951       ID = Dtor;
1952     }
1953     // Register the information for the entry associated with the destructor.
1954     Out.clear();
1955     auto DtorEntryInfo = EntryInfo;
1956     DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1957     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1958         DtorEntryInfo, Dtor, ID,
1959         llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1960   }
1961   return CGM.getLangOpts().OpenMPIsDevice;
1962 }
1963 
1964 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1965                                                           QualType VarType,
1966                                                           StringRef Name) {
1967   std::string Suffix = getName({"artificial", ""});
1968   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1969   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1970       VarLVType, Twine(Name).concat(Suffix).str());
1971   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1972       CGM.getTarget().isTLSSupported()) {
1973     GAddr->setThreadLocal(/*Val=*/true);
1974     return Address(GAddr, GAddr->getValueType(),
1975                    CGM.getContext().getTypeAlignInChars(VarType));
1976   }
1977   std::string CacheSuffix = getName({"cache", ""});
1978   llvm::Value *Args[] = {
1979       emitUpdateLocation(CGF, SourceLocation()),
1980       getThreadID(CGF, SourceLocation()),
1981       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1982       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1983                                 /*isSigned=*/false),
1984       OMPBuilder.getOrCreateInternalVariable(
1985           CGM.VoidPtrPtrTy,
1986           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1987   return Address(
1988       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1989           CGF.EmitRuntimeCall(
1990               OMPBuilder.getOrCreateRuntimeFunction(
1991                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1992               Args),
1993           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1994       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1995 }
1996 
1997 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1998                                    const RegionCodeGenTy &ThenGen,
1999                                    const RegionCodeGenTy &ElseGen) {
2000   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2001 
2002   // If the condition constant folds and can be elided, try to avoid emitting
2003   // the condition and the dead arm of the if/else.
2004   bool CondConstant;
2005   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2006     if (CondConstant)
2007       ThenGen(CGF);
2008     else
2009       ElseGen(CGF);
2010     return;
2011   }
2012 
2013   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2014   // emit the conditional branch.
2015   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2016   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2017   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2018   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2019 
2020   // Emit the 'then' code.
2021   CGF.EmitBlock(ThenBlock);
2022   ThenGen(CGF);
2023   CGF.EmitBranch(ContBlock);
2024   // Emit the 'else' code if present.
2025   // There is no need to emit line number for unconditional branch.
2026   (void)ApplyDebugLocation::CreateEmpty(CGF);
2027   CGF.EmitBlock(ElseBlock);
2028   ElseGen(CGF);
2029   // There is no need to emit line number for unconditional branch.
2030   (void)ApplyDebugLocation::CreateEmpty(CGF);
2031   CGF.EmitBranch(ContBlock);
2032   // Emit the continuation block for code after the if.
2033   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2034 }
2035 
2036 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2037                                        llvm::Function *OutlinedFn,
2038                                        ArrayRef<llvm::Value *> CapturedVars,
2039                                        const Expr *IfCond,
2040                                        llvm::Value *NumThreads) {
2041   if (!CGF.HaveInsertPoint())
2042     return;
2043   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2044   auto &M = CGM.getModule();
2045   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2046                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2047     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2048     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2049     llvm::Value *Args[] = {
2050         RTLoc,
2051         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2052         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2053     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2054     RealArgs.append(std::begin(Args), std::end(Args));
2055     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2056 
2057     llvm::FunctionCallee RTLFn =
2058         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2059     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2060   };
2061   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2062                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2063     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2064     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2065     // Build calls:
2066     // __kmpc_serialized_parallel(&Loc, GTid);
2067     llvm::Value *Args[] = {RTLoc, ThreadID};
2068     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2069                             M, OMPRTL___kmpc_serialized_parallel),
2070                         Args);
2071 
2072     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2073     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2074     Address ZeroAddrBound =
2075         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2076                                          /*Name=*/".bound.zero.addr");
2077     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2078     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2079     // ThreadId for serialized parallels is 0.
2080     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2081     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2082     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2083 
2084     // Ensure we do not inline the function. This is trivially true for the ones
2085     // passed to __kmpc_fork_call but the ones called in serialized regions
2086     // could be inlined. This is not a perfect but it is closer to the invariant
2087     // we want, namely, every data environment starts with a new function.
2088     // TODO: We should pass the if condition to the runtime function and do the
2089     //       handling there. Much cleaner code.
2090     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2091     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2092     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2093 
2094     // __kmpc_end_serialized_parallel(&Loc, GTid);
2095     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2096     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2097                             M, OMPRTL___kmpc_end_serialized_parallel),
2098                         EndArgs);
2099   };
2100   if (IfCond) {
2101     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2102   } else {
2103     RegionCodeGenTy ThenRCG(ThenGen);
2104     ThenRCG(CGF);
2105   }
2106 }
2107 
2108 // If we're inside an (outlined) parallel region, use the region info's
2109 // thread-ID variable (it is passed in a first argument of the outlined function
2110 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2111 // regular serial code region, get thread ID by calling kmp_int32
2112 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2113 // return the address of that temp.
2114 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2115                                              SourceLocation Loc) {
2116   if (auto *OMPRegionInfo =
2117           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2118     if (OMPRegionInfo->getThreadIDVariable())
2119       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2120 
2121   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2122   QualType Int32Ty =
2123       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2124   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2125   CGF.EmitStoreOfScalar(ThreadID,
2126                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2127 
2128   return ThreadIDTemp;
2129 }
2130 
2131 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2132   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2133   std::string Name = getName({Prefix, "var"});
2134   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2135 }
2136 
2137 namespace {
2138 /// Common pre(post)-action for different OpenMP constructs.
2139 class CommonActionTy final : public PrePostActionTy {
2140   llvm::FunctionCallee EnterCallee;
2141   ArrayRef<llvm::Value *> EnterArgs;
2142   llvm::FunctionCallee ExitCallee;
2143   ArrayRef<llvm::Value *> ExitArgs;
2144   bool Conditional;
2145   llvm::BasicBlock *ContBlock = nullptr;
2146 
2147 public:
2148   CommonActionTy(llvm::FunctionCallee EnterCallee,
2149                  ArrayRef<llvm::Value *> EnterArgs,
2150                  llvm::FunctionCallee ExitCallee,
2151                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2152       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2153         ExitArgs(ExitArgs), Conditional(Conditional) {}
2154   void Enter(CodeGenFunction &CGF) override {
2155     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2156     if (Conditional) {
2157       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2158       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2159       ContBlock = CGF.createBasicBlock("omp_if.end");
2160       // Generate the branch (If-stmt)
2161       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2162       CGF.EmitBlock(ThenBlock);
2163     }
2164   }
2165   void Done(CodeGenFunction &CGF) {
2166     // Emit the rest of blocks/branches
2167     CGF.EmitBranch(ContBlock);
2168     CGF.EmitBlock(ContBlock, true);
2169   }
2170   void Exit(CodeGenFunction &CGF) override {
2171     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2172   }
2173 };
2174 } // anonymous namespace
2175 
2176 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2177                                          StringRef CriticalName,
2178                                          const RegionCodeGenTy &CriticalOpGen,
2179                                          SourceLocation Loc, const Expr *Hint) {
2180   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2181   // CriticalOpGen();
2182   // __kmpc_end_critical(ident_t *, gtid, Lock);
2183   // Prepare arguments and build a call to __kmpc_critical
2184   if (!CGF.HaveInsertPoint())
2185     return;
2186   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2187                          getCriticalRegionLock(CriticalName)};
2188   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2189                                                 std::end(Args));
2190   if (Hint) {
2191     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2192         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2193   }
2194   CommonActionTy Action(
2195       OMPBuilder.getOrCreateRuntimeFunction(
2196           CGM.getModule(),
2197           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2198       EnterArgs,
2199       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2200                                             OMPRTL___kmpc_end_critical),
2201       Args);
2202   CriticalOpGen.setAction(Action);
2203   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2204 }
2205 
2206 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2207                                        const RegionCodeGenTy &MasterOpGen,
2208                                        SourceLocation Loc) {
2209   if (!CGF.HaveInsertPoint())
2210     return;
2211   // if(__kmpc_master(ident_t *, gtid)) {
2212   //   MasterOpGen();
2213   //   __kmpc_end_master(ident_t *, gtid);
2214   // }
2215   // Prepare arguments and build a call to __kmpc_master
2216   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2217   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2218                             CGM.getModule(), OMPRTL___kmpc_master),
2219                         Args,
2220                         OMPBuilder.getOrCreateRuntimeFunction(
2221                             CGM.getModule(), OMPRTL___kmpc_end_master),
2222                         Args,
2223                         /*Conditional=*/true);
2224   MasterOpGen.setAction(Action);
2225   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2226   Action.Done(CGF);
2227 }
2228 
2229 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2230                                        const RegionCodeGenTy &MaskedOpGen,
2231                                        SourceLocation Loc, const Expr *Filter) {
2232   if (!CGF.HaveInsertPoint())
2233     return;
2234   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2235   //   MaskedOpGen();
2236   //   __kmpc_end_masked(iden_t *, gtid);
2237   // }
2238   // Prepare arguments and build a call to __kmpc_masked
2239   llvm::Value *FilterVal = Filter
2240                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2241                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2242   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2243                          FilterVal};
2244   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2245                             getThreadID(CGF, Loc)};
2246   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247                             CGM.getModule(), OMPRTL___kmpc_masked),
2248                         Args,
2249                         OMPBuilder.getOrCreateRuntimeFunction(
2250                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2251                         ArgsEnd,
2252                         /*Conditional=*/true);
2253   MaskedOpGen.setAction(Action);
2254   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2255   Action.Done(CGF);
2256 }
2257 
2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259                                         SourceLocation Loc) {
2260   if (!CGF.HaveInsertPoint())
2261     return;
2262   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263     OMPBuilder.createTaskyield(CGF.Builder);
2264   } else {
2265     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266     llvm::Value *Args[] = {
2267         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271                         Args);
2272   }
2273 
2274   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275     Region->emitUntiedSwitch(CGF);
2276 }
2277 
2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279                                           const RegionCodeGenTy &TaskgroupOpGen,
2280                                           SourceLocation Loc) {
2281   if (!CGF.HaveInsertPoint())
2282     return;
2283   // __kmpc_taskgroup(ident_t *, gtid);
2284   // TaskgroupOpGen();
2285   // __kmpc_end_taskgroup(ident_t *, gtid);
2286   // Prepare arguments and build a call to __kmpc_taskgroup
2287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290                         Args,
2291                         OMPBuilder.getOrCreateRuntimeFunction(
2292                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293                         Args);
2294   TaskgroupOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296 }
2297 
2298 /// Given an array of pointers to variables, project the address of a
2299 /// given variable.
2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301                                       unsigned Index, const VarDecl *Var) {
2302   // Pull out the pointer to the variable.
2303   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305 
2306   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2307   return Address(
2308       CGF.Builder.CreateBitCast(
2309           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2310       ElemTy, CGF.getContext().getDeclAlign(Var));
2311 }
2312 
2313 static llvm::Value *emitCopyprivateCopyFunction(
2314     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2315     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2316     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2317     SourceLocation Loc) {
2318   ASTContext &C = CGM.getContext();
2319   // void copy_func(void *LHSArg, void *RHSArg);
2320   FunctionArgList Args;
2321   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2322                            ImplicitParamDecl::Other);
2323   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2324                            ImplicitParamDecl::Other);
2325   Args.push_back(&LHSArg);
2326   Args.push_back(&RHSArg);
2327   const auto &CGFI =
2328       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2329   std::string Name =
2330       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2331   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2332                                     llvm::GlobalValue::InternalLinkage, Name,
2333                                     &CGM.getModule());
2334   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2335   Fn->setDoesNotRecurse();
2336   CodeGenFunction CGF(CGM);
2337   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2338   // Dest = (void*[n])(LHSArg);
2339   // Src = (void*[n])(RHSArg);
2340   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2341                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2342                   ArgsElemType->getPointerTo()),
2343               ArgsElemType, CGF.getPointerAlign());
2344   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2345                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2346                   ArgsElemType->getPointerTo()),
2347               ArgsElemType, CGF.getPointerAlign());
2348   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2349   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2350   // ...
2351   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2352   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2353     const auto *DestVar =
2354         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2355     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2356 
2357     const auto *SrcVar =
2358         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2359     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2360 
2361     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2362     QualType Type = VD->getType();
2363     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2364   }
2365   CGF.FinishFunction();
2366   return Fn;
2367 }
2368 
2369 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2370                                        const RegionCodeGenTy &SingleOpGen,
2371                                        SourceLocation Loc,
2372                                        ArrayRef<const Expr *> CopyprivateVars,
2373                                        ArrayRef<const Expr *> SrcExprs,
2374                                        ArrayRef<const Expr *> DstExprs,
2375                                        ArrayRef<const Expr *> AssignmentOps) {
2376   if (!CGF.HaveInsertPoint())
2377     return;
2378   assert(CopyprivateVars.size() == SrcExprs.size() &&
2379          CopyprivateVars.size() == DstExprs.size() &&
2380          CopyprivateVars.size() == AssignmentOps.size());
2381   ASTContext &C = CGM.getContext();
2382   // int32 did_it = 0;
2383   // if(__kmpc_single(ident_t *, gtid)) {
2384   //   SingleOpGen();
2385   //   __kmpc_end_single(ident_t *, gtid);
2386   //   did_it = 1;
2387   // }
2388   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2389   // <copy_func>, did_it);
2390 
2391   Address DidIt = Address::invalid();
2392   if (!CopyprivateVars.empty()) {
2393     // int32 did_it = 0;
2394     QualType KmpInt32Ty =
2395         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2396     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2397     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2398   }
2399   // Prepare arguments and build a call to __kmpc_single
2400   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2401   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2402                             CGM.getModule(), OMPRTL___kmpc_single),
2403                         Args,
2404                         OMPBuilder.getOrCreateRuntimeFunction(
2405                             CGM.getModule(), OMPRTL___kmpc_end_single),
2406                         Args,
2407                         /*Conditional=*/true);
2408   SingleOpGen.setAction(Action);
2409   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2410   if (DidIt.isValid()) {
2411     // did_it = 1;
2412     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2413   }
2414   Action.Done(CGF);
2415   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2416   // <copy_func>, did_it);
2417   if (DidIt.isValid()) {
2418     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2419     QualType CopyprivateArrayTy = C.getConstantArrayType(
2420         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2421         /*IndexTypeQuals=*/0);
2422     // Create a list of all private variables for copyprivate.
2423     Address CopyprivateList =
2424         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2425     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2426       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2427       CGF.Builder.CreateStore(
2428           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2429               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2430               CGF.VoidPtrTy),
2431           Elem);
2432     }
2433     // Build function that copies private values from single region to all other
2434     // threads in the corresponding parallel region.
2435     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2436         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2437         SrcExprs, DstExprs, AssignmentOps, Loc);
2438     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2439     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2440         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2441     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2442     llvm::Value *Args[] = {
2443         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2444         getThreadID(CGF, Loc),        // i32 <gtid>
2445         BufSize,                      // size_t <buf_size>
2446         CL.getPointer(),              // void *<copyprivate list>
2447         CpyFn,                        // void (*) (void *, void *) <copy_func>
2448         DidItVal                      // i32 did_it
2449     };
2450     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2451                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2452                         Args);
2453   }
2454 }
2455 
2456 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2457                                         const RegionCodeGenTy &OrderedOpGen,
2458                                         SourceLocation Loc, bool IsThreads) {
2459   if (!CGF.HaveInsertPoint())
2460     return;
2461   // __kmpc_ordered(ident_t *, gtid);
2462   // OrderedOpGen();
2463   // __kmpc_end_ordered(ident_t *, gtid);
2464   // Prepare arguments and build a call to __kmpc_ordered
2465   if (IsThreads) {
2466     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2467     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2468                               CGM.getModule(), OMPRTL___kmpc_ordered),
2469                           Args,
2470                           OMPBuilder.getOrCreateRuntimeFunction(
2471                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2472                           Args);
2473     OrderedOpGen.setAction(Action);
2474     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2475     return;
2476   }
2477   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2478 }
2479 
2480 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2481   unsigned Flags;
2482   if (Kind == OMPD_for)
2483     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2484   else if (Kind == OMPD_sections)
2485     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2486   else if (Kind == OMPD_single)
2487     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2488   else if (Kind == OMPD_barrier)
2489     Flags = OMP_IDENT_BARRIER_EXPL;
2490   else
2491     Flags = OMP_IDENT_BARRIER_IMPL;
2492   return Flags;
2493 }
2494 
2495 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2496     CodeGenFunction &CGF, const OMPLoopDirective &S,
2497     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2498   // Check if the loop directive is actually a doacross loop directive. In this
2499   // case choose static, 1 schedule.
2500   if (llvm::any_of(
2501           S.getClausesOfKind<OMPOrderedClause>(),
2502           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2503     ScheduleKind = OMPC_SCHEDULE_static;
2504     // Chunk size is 1 in this case.
2505     llvm::APInt ChunkSize(32, 1);
2506     ChunkExpr = IntegerLiteral::Create(
2507         CGF.getContext(), ChunkSize,
2508         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2509         SourceLocation());
2510   }
2511 }
2512 
2513 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2514                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2515                                       bool ForceSimpleCall) {
2516   // Check if we should use the OMPBuilder
2517   auto *OMPRegionInfo =
2518       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2519   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2520     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2521         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2522     return;
2523   }
2524 
2525   if (!CGF.HaveInsertPoint())
2526     return;
2527   // Build call __kmpc_cancel_barrier(loc, thread_id);
2528   // Build call __kmpc_barrier(loc, thread_id);
2529   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2530   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2531   // thread_id);
2532   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2533                          getThreadID(CGF, Loc)};
2534   if (OMPRegionInfo) {
2535     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2536       llvm::Value *Result = CGF.EmitRuntimeCall(
2537           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2538                                                 OMPRTL___kmpc_cancel_barrier),
2539           Args);
2540       if (EmitChecks) {
2541         // if (__kmpc_cancel_barrier()) {
2542         //   exit from construct;
2543         // }
2544         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2545         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2546         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2547         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2548         CGF.EmitBlock(ExitBB);
2549         //   exit from construct;
2550         CodeGenFunction::JumpDest CancelDestination =
2551             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2552         CGF.EmitBranchThroughCleanup(CancelDestination);
2553         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2554       }
2555       return;
2556     }
2557   }
2558   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2559                           CGM.getModule(), OMPRTL___kmpc_barrier),
2560                       Args);
2561 }
2562 
2563 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2564                                     Expr *ME, bool IsFatal) {
2565   llvm::Value *MVL =
2566       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2567          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2568   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2569   // *message)
2570   llvm::Value *Args[] = {
2571       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2572       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2573       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2574   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2575                           CGM.getModule(), OMPRTL___kmpc_error),
2576                       Args);
2577 }
2578 
2579 /// Map the OpenMP loop schedule to the runtime enumeration.
2580 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2581                                           bool Chunked, bool Ordered) {
2582   switch (ScheduleKind) {
2583   case OMPC_SCHEDULE_static:
2584     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2585                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2586   case OMPC_SCHEDULE_dynamic:
2587     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2588   case OMPC_SCHEDULE_guided:
2589     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2590   case OMPC_SCHEDULE_runtime:
2591     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2592   case OMPC_SCHEDULE_auto:
2593     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2594   case OMPC_SCHEDULE_unknown:
2595     assert(!Chunked && "chunk was specified but schedule kind not known");
2596     return Ordered ? OMP_ord_static : OMP_sch_static;
2597   }
2598   llvm_unreachable("Unexpected runtime schedule");
2599 }
2600 
2601 /// Map the OpenMP distribute schedule to the runtime enumeration.
2602 static OpenMPSchedType
2603 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2604   // only static is allowed for dist_schedule
2605   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2606 }
2607 
2608 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2609                                          bool Chunked) const {
2610   OpenMPSchedType Schedule =
2611       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2612   return Schedule == OMP_sch_static;
2613 }
2614 
2615 bool CGOpenMPRuntime::isStaticNonchunked(
2616     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2617   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2618   return Schedule == OMP_dist_sch_static;
2619 }
2620 
2621 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2622                                       bool Chunked) const {
2623   OpenMPSchedType Schedule =
2624       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2625   return Schedule == OMP_sch_static_chunked;
2626 }
2627 
2628 bool CGOpenMPRuntime::isStaticChunked(
2629     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2630   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2631   return Schedule == OMP_dist_sch_static_chunked;
2632 }
2633 
2634 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2635   OpenMPSchedType Schedule =
2636       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2637   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2638   return Schedule != OMP_sch_static;
2639 }
2640 
2641 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2642                                   OpenMPScheduleClauseModifier M1,
2643                                   OpenMPScheduleClauseModifier M2) {
2644   int Modifier = 0;
2645   switch (M1) {
2646   case OMPC_SCHEDULE_MODIFIER_monotonic:
2647     Modifier = OMP_sch_modifier_monotonic;
2648     break;
2649   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2650     Modifier = OMP_sch_modifier_nonmonotonic;
2651     break;
2652   case OMPC_SCHEDULE_MODIFIER_simd:
2653     if (Schedule == OMP_sch_static_chunked)
2654       Schedule = OMP_sch_static_balanced_chunked;
2655     break;
2656   case OMPC_SCHEDULE_MODIFIER_last:
2657   case OMPC_SCHEDULE_MODIFIER_unknown:
2658     break;
2659   }
2660   switch (M2) {
2661   case OMPC_SCHEDULE_MODIFIER_monotonic:
2662     Modifier = OMP_sch_modifier_monotonic;
2663     break;
2664   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2665     Modifier = OMP_sch_modifier_nonmonotonic;
2666     break;
2667   case OMPC_SCHEDULE_MODIFIER_simd:
2668     if (Schedule == OMP_sch_static_chunked)
2669       Schedule = OMP_sch_static_balanced_chunked;
2670     break;
2671   case OMPC_SCHEDULE_MODIFIER_last:
2672   case OMPC_SCHEDULE_MODIFIER_unknown:
2673     break;
2674   }
2675   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2676   // If the static schedule kind is specified or if the ordered clause is
2677   // specified, and if the nonmonotonic modifier is not specified, the effect is
2678   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2679   // modifier is specified, the effect is as if the nonmonotonic modifier is
2680   // specified.
2681   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2682     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2683           Schedule == OMP_sch_static_balanced_chunked ||
2684           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2685           Schedule == OMP_dist_sch_static_chunked ||
2686           Schedule == OMP_dist_sch_static))
2687       Modifier = OMP_sch_modifier_nonmonotonic;
2688   }
2689   return Schedule | Modifier;
2690 }
2691 
2692 void CGOpenMPRuntime::emitForDispatchInit(
2693     CodeGenFunction &CGF, SourceLocation Loc,
2694     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2695     bool Ordered, const DispatchRTInput &DispatchValues) {
2696   if (!CGF.HaveInsertPoint())
2697     return;
2698   OpenMPSchedType Schedule = getRuntimeSchedule(
2699       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2700   assert(Ordered ||
2701          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2702           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2703           Schedule != OMP_sch_static_balanced_chunked));
2704   // Call __kmpc_dispatch_init(
2705   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2706   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2707   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2708 
2709   // If the Chunk was not specified in the clause - use default value 1.
2710   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2711                                             : CGF.Builder.getIntN(IVSize, 1);
2712   llvm::Value *Args[] = {
2713       emitUpdateLocation(CGF, Loc),
2714       getThreadID(CGF, Loc),
2715       CGF.Builder.getInt32(addMonoNonMonoModifier(
2716           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2717       DispatchValues.LB,                                     // Lower
2718       DispatchValues.UB,                                     // Upper
2719       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2720       Chunk                                                  // Chunk
2721   };
2722   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2723 }
2724 
2725 static void emitForStaticInitCall(
2726     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2727     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2728     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2729     const CGOpenMPRuntime::StaticRTInput &Values) {
2730   if (!CGF.HaveInsertPoint())
2731     return;
2732 
2733   assert(!Values.Ordered);
2734   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2735          Schedule == OMP_sch_static_balanced_chunked ||
2736          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2737          Schedule == OMP_dist_sch_static ||
2738          Schedule == OMP_dist_sch_static_chunked);
2739 
2740   // Call __kmpc_for_static_init(
2741   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2742   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2743   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2744   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2745   llvm::Value *Chunk = Values.Chunk;
2746   if (Chunk == nullptr) {
2747     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2748             Schedule == OMP_dist_sch_static) &&
2749            "expected static non-chunked schedule");
2750     // If the Chunk was not specified in the clause - use default value 1.
2751     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2752   } else {
2753     assert((Schedule == OMP_sch_static_chunked ||
2754             Schedule == OMP_sch_static_balanced_chunked ||
2755             Schedule == OMP_ord_static_chunked ||
2756             Schedule == OMP_dist_sch_static_chunked) &&
2757            "expected static chunked schedule");
2758   }
2759   llvm::Value *Args[] = {
2760       UpdateLocation,
2761       ThreadId,
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2763                                                   M2)), // Schedule type
2764       Values.IL.getPointer(),                           // &isLastIter
2765       Values.LB.getPointer(),                           // &LB
2766       Values.UB.getPointer(),                           // &UB
2767       Values.ST.getPointer(),                           // &Stride
2768       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2769       Chunk                                             // Chunk
2770   };
2771   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2772 }
2773 
2774 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2775                                         SourceLocation Loc,
2776                                         OpenMPDirectiveKind DKind,
2777                                         const OpenMPScheduleTy &ScheduleKind,
2778                                         const StaticRTInput &Values) {
2779   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2780       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2781   assert(isOpenMPWorksharingDirective(DKind) &&
2782          "Expected loop-based or sections-based directive.");
2783   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2784                                              isOpenMPLoopDirective(DKind)
2785                                                  ? OMP_IDENT_WORK_LOOP
2786                                                  : OMP_IDENT_WORK_SECTIONS);
2787   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2788   llvm::FunctionCallee StaticInitFunction =
2789       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2790   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2791   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2792                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2793 }
2794 
2795 void CGOpenMPRuntime::emitDistributeStaticInit(
2796     CodeGenFunction &CGF, SourceLocation Loc,
2797     OpenMPDistScheduleClauseKind SchedKind,
2798     const CGOpenMPRuntime::StaticRTInput &Values) {
2799   OpenMPSchedType ScheduleNum =
2800       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2801   llvm::Value *UpdatedLocation =
2802       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2803   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2804   llvm::FunctionCallee StaticInitFunction;
2805   bool isGPUDistribute =
2806       CGM.getLangOpts().OpenMPIsDevice &&
2807       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2808   StaticInitFunction = createForStaticInitFunction(
2809       Values.IVSize, Values.IVSigned, isGPUDistribute);
2810 
2811   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2812                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2813                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2814 }
2815 
2816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2817                                           SourceLocation Loc,
2818                                           OpenMPDirectiveKind DKind) {
2819   if (!CGF.HaveInsertPoint())
2820     return;
2821   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2822   llvm::Value *Args[] = {
2823       emitUpdateLocation(CGF, Loc,
2824                          isOpenMPDistributeDirective(DKind)
2825                              ? OMP_IDENT_WORK_DISTRIBUTE
2826                              : isOpenMPLoopDirective(DKind)
2827                                    ? OMP_IDENT_WORK_LOOP
2828                                    : OMP_IDENT_WORK_SECTIONS),
2829       getThreadID(CGF, Loc)};
2830   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2832       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2833     CGF.EmitRuntimeCall(
2834         OMPBuilder.getOrCreateRuntimeFunction(
2835             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2836         Args);
2837   else
2838     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2839                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2840                         Args);
2841 }
2842 
2843 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2844                                                  SourceLocation Loc,
2845                                                  unsigned IVSize,
2846                                                  bool IVSigned) {
2847   if (!CGF.HaveInsertPoint())
2848     return;
2849   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2850   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2851   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2852 }
2853 
2854 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2855                                           SourceLocation Loc, unsigned IVSize,
2856                                           bool IVSigned, Address IL,
2857                                           Address LB, Address UB,
2858                                           Address ST) {
2859   // Call __kmpc_dispatch_next(
2860   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2861   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2862   //          kmp_int[32|64] *p_stride);
2863   llvm::Value *Args[] = {
2864       emitUpdateLocation(CGF, Loc),
2865       getThreadID(CGF, Loc),
2866       IL.getPointer(), // &isLastIter
2867       LB.getPointer(), // &Lower
2868       UB.getPointer(), // &Upper
2869       ST.getPointer()  // &Stride
2870   };
2871   llvm::Value *Call =
2872       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2873   return CGF.EmitScalarConversion(
2874       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2875       CGF.getContext().BoolTy, Loc);
2876 }
2877 
2878 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2879                                            llvm::Value *NumThreads,
2880                                            SourceLocation Loc) {
2881   if (!CGF.HaveInsertPoint())
2882     return;
2883   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2884   llvm::Value *Args[] = {
2885       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2886       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2887   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2889                       Args);
2890 }
2891 
2892 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2893                                          ProcBindKind ProcBind,
2894                                          SourceLocation Loc) {
2895   if (!CGF.HaveInsertPoint())
2896     return;
2897   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2898   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2899   llvm::Value *Args[] = {
2900       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2901       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2902   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2903                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2904                       Args);
2905 }
2906 
2907 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2908                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2909   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2910     OMPBuilder.createFlush(CGF.Builder);
2911   } else {
2912     if (!CGF.HaveInsertPoint())
2913       return;
2914     // Build call void __kmpc_flush(ident_t *loc)
2915     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2916                             CGM.getModule(), OMPRTL___kmpc_flush),
2917                         emitUpdateLocation(CGF, Loc));
2918   }
2919 }
2920 
2921 namespace {
2922 /// Indexes of fields for type kmp_task_t.
2923 enum KmpTaskTFields {
2924   /// List of shared variables.
2925   KmpTaskTShareds,
2926   /// Task routine.
2927   KmpTaskTRoutine,
2928   /// Partition id for the untied tasks.
2929   KmpTaskTPartId,
2930   /// Function with call of destructors for private variables.
2931   Data1,
2932   /// Task priority.
2933   Data2,
2934   /// (Taskloops only) Lower bound.
2935   KmpTaskTLowerBound,
2936   /// (Taskloops only) Upper bound.
2937   KmpTaskTUpperBound,
2938   /// (Taskloops only) Stride.
2939   KmpTaskTStride,
2940   /// (Taskloops only) Is last iteration flag.
2941   KmpTaskTLastIter,
2942   /// (Taskloops only) Reduction data.
2943   KmpTaskTReductions,
2944 };
2945 } // anonymous namespace
2946 
2947 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2948   // If we are in simd mode or there are no entries, we don't need to do
2949   // anything.
2950   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
2951     return;
2952 
2953   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2954       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2955              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2956     SourceLocation Loc;
2957     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2958       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2959                 E = CGM.getContext().getSourceManager().fileinfo_end();
2960            I != E; ++I) {
2961         if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2962             I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
2963           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2964               I->getFirst(), EntryInfo.Line, 1);
2965           break;
2966         }
2967       }
2968     }
2969     switch (Kind) {
2970     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2971       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2972           DiagnosticsEngine::Error, "Offloading entry for target region in "
2973                                     "%0 is incorrect: either the "
2974                                     "address or the ID is invalid.");
2975       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2976     } break;
2977     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2978       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2979           DiagnosticsEngine::Error, "Offloading entry for declare target "
2980                                     "variable %0 is incorrect: the "
2981                                     "address is invalid.");
2982       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2983     } break;
2984     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2985       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2986           DiagnosticsEngine::Error,
2987           "Offloading entry for declare target variable is incorrect: the "
2988           "address is invalid.");
2989       CGM.getDiags().Report(DiagID);
2990     } break;
2991     }
2992   };
2993 
2994   OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager,
2995                                                  ErrorReportFn);
2996 }
2997 
2998 /// Loads all the offload entries information from the host IR
2999 /// metadata.
3000 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3001   // If we are in target mode, load the metadata from the host IR. This code has
3002   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3003 
3004   if (!CGM.getLangOpts().OpenMPIsDevice)
3005     return;
3006 
3007   if (CGM.getLangOpts().OMPHostIRFile.empty())
3008     return;
3009 
3010   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3011   if (auto EC = Buf.getError()) {
3012     CGM.getDiags().Report(diag::err_cannot_open_file)
3013         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3014     return;
3015   }
3016 
3017   llvm::LLVMContext C;
3018   auto ME = expectedToErrorOrAndEmitErrors(
3019       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3020 
3021   if (auto EC = ME.getError()) {
3022     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3023         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3024     CGM.getDiags().Report(DiagID)
3025         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3026     return;
3027   }
3028 
3029   OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager);
3030 }
3031 
3032 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3033   if (!KmpRoutineEntryPtrTy) {
3034     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3035     ASTContext &C = CGM.getContext();
3036     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3037     FunctionProtoType::ExtProtoInfo EPI;
3038     KmpRoutineEntryPtrQTy = C.getPointerType(
3039         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3040     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3041   }
3042 }
3043 
3044 namespace {
3045 struct PrivateHelpersTy {
3046   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3047                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3048       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3049         PrivateElemInit(PrivateElemInit) {}
3050   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3051   const Expr *OriginalRef = nullptr;
3052   const VarDecl *Original = nullptr;
3053   const VarDecl *PrivateCopy = nullptr;
3054   const VarDecl *PrivateElemInit = nullptr;
3055   bool isLocalPrivate() const {
3056     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3057   }
3058 };
3059 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3060 } // anonymous namespace
3061 
3062 static bool isAllocatableDecl(const VarDecl *VD) {
3063   const VarDecl *CVD = VD->getCanonicalDecl();
3064   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3065     return false;
3066   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3067   // Use the default allocation.
3068   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3069            !AA->getAllocator());
3070 }
3071 
3072 static RecordDecl *
3073 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3074   if (!Privates.empty()) {
3075     ASTContext &C = CGM.getContext();
3076     // Build struct .kmp_privates_t. {
3077     //         /*  private vars  */
3078     //       };
3079     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3080     RD->startDefinition();
3081     for (const auto &Pair : Privates) {
3082       const VarDecl *VD = Pair.second.Original;
3083       QualType Type = VD->getType().getNonReferenceType();
3084       // If the private variable is a local variable with lvalue ref type,
3085       // allocate the pointer instead of the pointee type.
3086       if (Pair.second.isLocalPrivate()) {
3087         if (VD->getType()->isLValueReferenceType())
3088           Type = C.getPointerType(Type);
3089         if (isAllocatableDecl(VD))
3090           Type = C.getPointerType(Type);
3091       }
3092       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3093       if (VD->hasAttrs()) {
3094         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3095              E(VD->getAttrs().end());
3096              I != E; ++I)
3097           FD->addAttr(*I);
3098       }
3099     }
3100     RD->completeDefinition();
3101     return RD;
3102   }
3103   return nullptr;
3104 }
3105 
3106 static RecordDecl *
3107 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3108                          QualType KmpInt32Ty,
3109                          QualType KmpRoutineEntryPointerQTy) {
3110   ASTContext &C = CGM.getContext();
3111   // Build struct kmp_task_t {
3112   //         void *              shareds;
3113   //         kmp_routine_entry_t routine;
3114   //         kmp_int32           part_id;
3115   //         kmp_cmplrdata_t data1;
3116   //         kmp_cmplrdata_t data2;
3117   // For taskloops additional fields:
3118   //         kmp_uint64          lb;
3119   //         kmp_uint64          ub;
3120   //         kmp_int64           st;
3121   //         kmp_int32           liter;
3122   //         void *              reductions;
3123   //       };
3124   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3125   UD->startDefinition();
3126   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3127   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3128   UD->completeDefinition();
3129   QualType KmpCmplrdataTy = C.getRecordType(UD);
3130   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3131   RD->startDefinition();
3132   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3133   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3134   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3135   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3136   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3137   if (isOpenMPTaskLoopDirective(Kind)) {
3138     QualType KmpUInt64Ty =
3139         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3140     QualType KmpInt64Ty =
3141         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3142     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3143     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3144     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3145     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3146     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3147   }
3148   RD->completeDefinition();
3149   return RD;
3150 }
3151 
3152 static RecordDecl *
3153 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3154                                      ArrayRef<PrivateDataTy> Privates) {
3155   ASTContext &C = CGM.getContext();
3156   // Build struct kmp_task_t_with_privates {
3157   //         kmp_task_t task_data;
3158   //         .kmp_privates_t. privates;
3159   //       };
3160   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3161   RD->startDefinition();
3162   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3163   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3164     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3165   RD->completeDefinition();
3166   return RD;
3167 }
3168 
3169 /// Emit a proxy function which accepts kmp_task_t as the second
3170 /// argument.
3171 /// \code
3172 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3173 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3174 ///   For taskloops:
3175 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3176 ///   tt->reductions, tt->shareds);
3177 ///   return 0;
3178 /// }
3179 /// \endcode
3180 static llvm::Function *
3181 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3182                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3183                       QualType KmpTaskTWithPrivatesPtrQTy,
3184                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3185                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3186                       llvm::Value *TaskPrivatesMap) {
3187   ASTContext &C = CGM.getContext();
3188   FunctionArgList Args;
3189   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3190                             ImplicitParamDecl::Other);
3191   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3192                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3193                                 ImplicitParamDecl::Other);
3194   Args.push_back(&GtidArg);
3195   Args.push_back(&TaskTypeArg);
3196   const auto &TaskEntryFnInfo =
3197       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3198   llvm::FunctionType *TaskEntryTy =
3199       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3200   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3201   auto *TaskEntry = llvm::Function::Create(
3202       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3203   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3204   TaskEntry->setDoesNotRecurse();
3205   CodeGenFunction CGF(CGM);
3206   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3207                     Loc, Loc);
3208 
3209   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3210   // tt,
3211   // For taskloops:
3212   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3213   // tt->task_data.shareds);
3214   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3215       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3216   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3217       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3218       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3219   const auto *KmpTaskTWithPrivatesQTyRD =
3220       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3221   LValue Base =
3222       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3223   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3224   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3225   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3226   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3227 
3228   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3229   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3230   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3231       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3232       CGF.ConvertTypeForMem(SharedsPtrTy));
3233 
3234   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3235   llvm::Value *PrivatesParam;
3236   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3237     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3238     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3239         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3240   } else {
3241     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3242   }
3243 
3244   llvm::Value *CommonArgs[] = {
3245       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3246       CGF.Builder
3247           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3248                                                CGF.VoidPtrTy, CGF.Int8Ty)
3249           .getPointer()};
3250   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3251                                           std::end(CommonArgs));
3252   if (isOpenMPTaskLoopDirective(Kind)) {
3253     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3254     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3255     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3256     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3257     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3258     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3259     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3260     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3261     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3262     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3263     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3264     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3265     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3266     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3267     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3268     CallArgs.push_back(LBParam);
3269     CallArgs.push_back(UBParam);
3270     CallArgs.push_back(StParam);
3271     CallArgs.push_back(LIParam);
3272     CallArgs.push_back(RParam);
3273   }
3274   CallArgs.push_back(SharedsParam);
3275 
3276   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3277                                                   CallArgs);
3278   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3279                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3280   CGF.FinishFunction();
3281   return TaskEntry;
3282 }
3283 
3284 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3285                                             SourceLocation Loc,
3286                                             QualType KmpInt32Ty,
3287                                             QualType KmpTaskTWithPrivatesPtrQTy,
3288                                             QualType KmpTaskTWithPrivatesQTy) {
3289   ASTContext &C = CGM.getContext();
3290   FunctionArgList Args;
3291   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3292                             ImplicitParamDecl::Other);
3293   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3294                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3295                                 ImplicitParamDecl::Other);
3296   Args.push_back(&GtidArg);
3297   Args.push_back(&TaskTypeArg);
3298   const auto &DestructorFnInfo =
3299       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3300   llvm::FunctionType *DestructorFnTy =
3301       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3302   std::string Name =
3303       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3304   auto *DestructorFn =
3305       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3306                              Name, &CGM.getModule());
3307   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3308                                     DestructorFnInfo);
3309   DestructorFn->setDoesNotRecurse();
3310   CodeGenFunction CGF(CGM);
3311   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3312                     Args, Loc, Loc);
3313 
3314   LValue Base = CGF.EmitLoadOfPointerLValue(
3315       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3316       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3317   const auto *KmpTaskTWithPrivatesQTyRD =
3318       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3319   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3320   Base = CGF.EmitLValueForField(Base, *FI);
3321   for (const auto *Field :
3322        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3323     if (QualType::DestructionKind DtorKind =
3324             Field->getType().isDestructedType()) {
3325       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3326       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3327     }
3328   }
3329   CGF.FinishFunction();
3330   return DestructorFn;
3331 }
3332 
3333 /// Emit a privates mapping function for correct handling of private and
3334 /// firstprivate variables.
3335 /// \code
3336 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3337 /// **noalias priv1,...,  <tyn> **noalias privn) {
3338 ///   *priv1 = &.privates.priv1;
3339 ///   ...;
3340 ///   *privn = &.privates.privn;
3341 /// }
3342 /// \endcode
3343 static llvm::Value *
3344 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3345                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3346                                ArrayRef<PrivateDataTy> Privates) {
3347   ASTContext &C = CGM.getContext();
3348   FunctionArgList Args;
3349   ImplicitParamDecl TaskPrivatesArg(
3350       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3351       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3352       ImplicitParamDecl::Other);
3353   Args.push_back(&TaskPrivatesArg);
3354   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3355   unsigned Counter = 1;
3356   for (const Expr *E : Data.PrivateVars) {
3357     Args.push_back(ImplicitParamDecl::Create(
3358         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3359         C.getPointerType(C.getPointerType(E->getType()))
3360             .withConst()
3361             .withRestrict(),
3362         ImplicitParamDecl::Other));
3363     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3364     PrivateVarsPos[VD] = Counter;
3365     ++Counter;
3366   }
3367   for (const Expr *E : Data.FirstprivateVars) {
3368     Args.push_back(ImplicitParamDecl::Create(
3369         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3370         C.getPointerType(C.getPointerType(E->getType()))
3371             .withConst()
3372             .withRestrict(),
3373         ImplicitParamDecl::Other));
3374     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3375     PrivateVarsPos[VD] = Counter;
3376     ++Counter;
3377   }
3378   for (const Expr *E : Data.LastprivateVars) {
3379     Args.push_back(ImplicitParamDecl::Create(
3380         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3381         C.getPointerType(C.getPointerType(E->getType()))
3382             .withConst()
3383             .withRestrict(),
3384         ImplicitParamDecl::Other));
3385     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3386     PrivateVarsPos[VD] = Counter;
3387     ++Counter;
3388   }
3389   for (const VarDecl *VD : Data.PrivateLocals) {
3390     QualType Ty = VD->getType().getNonReferenceType();
3391     if (VD->getType()->isLValueReferenceType())
3392       Ty = C.getPointerType(Ty);
3393     if (isAllocatableDecl(VD))
3394       Ty = C.getPointerType(Ty);
3395     Args.push_back(ImplicitParamDecl::Create(
3396         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3397         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3398         ImplicitParamDecl::Other));
3399     PrivateVarsPos[VD] = Counter;
3400     ++Counter;
3401   }
3402   const auto &TaskPrivatesMapFnInfo =
3403       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3404   llvm::FunctionType *TaskPrivatesMapTy =
3405       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3406   std::string Name =
3407       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3408   auto *TaskPrivatesMap = llvm::Function::Create(
3409       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3410       &CGM.getModule());
3411   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3412                                     TaskPrivatesMapFnInfo);
3413   if (CGM.getLangOpts().Optimize) {
3414     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3415     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3416     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3417   }
3418   CodeGenFunction CGF(CGM);
3419   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3420                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3421 
3422   // *privi = &.privates.privi;
3423   LValue Base = CGF.EmitLoadOfPointerLValue(
3424       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3425       TaskPrivatesArg.getType()->castAs<PointerType>());
3426   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3427   Counter = 0;
3428   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3429     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3430     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3431     LValue RefLVal =
3432         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3433     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3434         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3435     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3436     ++Counter;
3437   }
3438   CGF.FinishFunction();
3439   return TaskPrivatesMap;
3440 }
3441 
3442 /// Emit initialization for private variables in task-based directives.
3443 static void emitPrivatesInit(CodeGenFunction &CGF,
3444                              const OMPExecutableDirective &D,
3445                              Address KmpTaskSharedsPtr, LValue TDBase,
3446                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3447                              QualType SharedsTy, QualType SharedsPtrTy,
3448                              const OMPTaskDataTy &Data,
3449                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3450   ASTContext &C = CGF.getContext();
3451   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3452   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3453   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3454                                  ? OMPD_taskloop
3455                                  : OMPD_task;
3456   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3457   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3458   LValue SrcBase;
3459   bool IsTargetTask =
3460       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3461       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3462   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3463   // PointersArray, SizesArray, and MappersArray. The original variables for
3464   // these arrays are not captured and we get their addresses explicitly.
3465   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3466       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3467     SrcBase = CGF.MakeAddrLValue(
3468         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3469             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3470             CGF.ConvertTypeForMem(SharedsTy)),
3471         SharedsTy);
3472   }
3473   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3474   for (const PrivateDataTy &Pair : Privates) {
3475     // Do not initialize private locals.
3476     if (Pair.second.isLocalPrivate()) {
3477       ++FI;
3478       continue;
3479     }
3480     const VarDecl *VD = Pair.second.PrivateCopy;
3481     const Expr *Init = VD->getAnyInitializer();
3482     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3483                              !CGF.isTrivialInitializer(Init)))) {
3484       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3485       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3486         const VarDecl *OriginalVD = Pair.second.Original;
3487         // Check if the variable is the target-based BasePointersArray,
3488         // PointersArray, SizesArray, or MappersArray.
3489         LValue SharedRefLValue;
3490         QualType Type = PrivateLValue.getType();
3491         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3492         if (IsTargetTask && !SharedField) {
3493           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3494                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3495                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3496                          ->getNumParams() == 0 &&
3497                  isa<TranslationUnitDecl>(
3498                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3499                          ->getDeclContext()) &&
3500                  "Expected artificial target data variable.");
3501           SharedRefLValue =
3502               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3503         } else if (ForDup) {
3504           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3505           SharedRefLValue = CGF.MakeAddrLValue(
3506               SharedRefLValue.getAddress(CGF).withAlignment(
3507                   C.getDeclAlign(OriginalVD)),
3508               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3509               SharedRefLValue.getTBAAInfo());
3510         } else if (CGF.LambdaCaptureFields.count(
3511                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3512                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3513           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3514         } else {
3515           // Processing for implicitly captured variables.
3516           InlinedOpenMPRegionRAII Region(
3517               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3518               /*HasCancel=*/false, /*NoInheritance=*/true);
3519           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3520         }
3521         if (Type->isArrayType()) {
3522           // Initialize firstprivate array.
3523           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3524             // Perform simple memcpy.
3525             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3526           } else {
3527             // Initialize firstprivate array using element-by-element
3528             // initialization.
3529             CGF.EmitOMPAggregateAssign(
3530                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3531                 Type,
3532                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3533                                                   Address SrcElement) {
3534                   // Clean up any temporaries needed by the initialization.
3535                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3536                   InitScope.addPrivate(Elem, SrcElement);
3537                   (void)InitScope.Privatize();
3538                   // Emit initialization for single element.
3539                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3540                       CGF, &CapturesInfo);
3541                   CGF.EmitAnyExprToMem(Init, DestElement,
3542                                        Init->getType().getQualifiers(),
3543                                        /*IsInitializer=*/false);
3544                 });
3545           }
3546         } else {
3547           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3548           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3549           (void)InitScope.Privatize();
3550           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3551           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3552                              /*capturedByInit=*/false);
3553         }
3554       } else {
3555         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3556       }
3557     }
3558     ++FI;
3559   }
3560 }
3561 
3562 /// Check if duplication function is required for taskloops.
3563 static bool checkInitIsRequired(CodeGenFunction &CGF,
3564                                 ArrayRef<PrivateDataTy> Privates) {
3565   bool InitRequired = false;
3566   for (const PrivateDataTy &Pair : Privates) {
3567     if (Pair.second.isLocalPrivate())
3568       continue;
3569     const VarDecl *VD = Pair.second.PrivateCopy;
3570     const Expr *Init = VD->getAnyInitializer();
3571     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3572                                     !CGF.isTrivialInitializer(Init));
3573     if (InitRequired)
3574       break;
3575   }
3576   return InitRequired;
3577 }
3578 
3579 
3580 /// Emit task_dup function (for initialization of
3581 /// private/firstprivate/lastprivate vars and last_iter flag)
3582 /// \code
3583 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3584 /// lastpriv) {
3585 /// // setup lastprivate flag
3586 ///    task_dst->last = lastpriv;
3587 /// // could be constructor calls here...
3588 /// }
3589 /// \endcode
3590 static llvm::Value *
3591 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3592                     const OMPExecutableDirective &D,
3593                     QualType KmpTaskTWithPrivatesPtrQTy,
3594                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3595                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3596                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3597                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3598   ASTContext &C = CGM.getContext();
3599   FunctionArgList Args;
3600   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3601                            KmpTaskTWithPrivatesPtrQTy,
3602                            ImplicitParamDecl::Other);
3603   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3604                            KmpTaskTWithPrivatesPtrQTy,
3605                            ImplicitParamDecl::Other);
3606   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3607                                 ImplicitParamDecl::Other);
3608   Args.push_back(&DstArg);
3609   Args.push_back(&SrcArg);
3610   Args.push_back(&LastprivArg);
3611   const auto &TaskDupFnInfo =
3612       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3613   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3614   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3615   auto *TaskDup = llvm::Function::Create(
3616       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3617   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3618   TaskDup->setDoesNotRecurse();
3619   CodeGenFunction CGF(CGM);
3620   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3621                     Loc);
3622 
3623   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3624       CGF.GetAddrOfLocalVar(&DstArg),
3625       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3626   // task_dst->liter = lastpriv;
3627   if (WithLastIter) {
3628     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3629     LValue Base = CGF.EmitLValueForField(
3630         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3631     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3632     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3633         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3634     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3635   }
3636 
3637   // Emit initial values for private copies (if any).
3638   assert(!Privates.empty());
3639   Address KmpTaskSharedsPtr = Address::invalid();
3640   if (!Data.FirstprivateVars.empty()) {
3641     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3642         CGF.GetAddrOfLocalVar(&SrcArg),
3643         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3644     LValue Base = CGF.EmitLValueForField(
3645         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3646     KmpTaskSharedsPtr = Address(
3647         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3648                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3649                                                   KmpTaskTShareds)),
3650                              Loc),
3651         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3652   }
3653   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3654                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3655   CGF.FinishFunction();
3656   return TaskDup;
3657 }
3658 
3659 /// Checks if destructor function is required to be generated.
3660 /// \return true if cleanups are required, false otherwise.
3661 static bool
3662 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3663                          ArrayRef<PrivateDataTy> Privates) {
3664   for (const PrivateDataTy &P : Privates) {
3665     if (P.second.isLocalPrivate())
3666       continue;
3667     QualType Ty = P.second.Original->getType().getNonReferenceType();
3668     if (Ty.isDestructedType())
3669       return true;
3670   }
3671   return false;
3672 }
3673 
3674 namespace {
3675 /// Loop generator for OpenMP iterator expression.
3676 class OMPIteratorGeneratorScope final
3677     : public CodeGenFunction::OMPPrivateScope {
3678   CodeGenFunction &CGF;
3679   const OMPIteratorExpr *E = nullptr;
3680   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3681   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3682   OMPIteratorGeneratorScope() = delete;
3683   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3684 
3685 public:
3686   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3687       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3688     if (!E)
3689       return;
3690     SmallVector<llvm::Value *, 4> Uppers;
3691     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3692       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3693       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3694       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3695       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3696       addPrivate(
3697           HelperData.CounterVD,
3698           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3699     }
3700     Privatize();
3701 
3702     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3703       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3704       LValue CLVal =
3705           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3706                              HelperData.CounterVD->getType());
3707       // Counter = 0;
3708       CGF.EmitStoreOfScalar(
3709           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3710           CLVal);
3711       CodeGenFunction::JumpDest &ContDest =
3712           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3713       CodeGenFunction::JumpDest &ExitDest =
3714           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3715       // N = <number-of_iterations>;
3716       llvm::Value *N = Uppers[I];
3717       // cont:
3718       // if (Counter < N) goto body; else goto exit;
3719       CGF.EmitBlock(ContDest.getBlock());
3720       auto *CVal =
3721           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3722       llvm::Value *Cmp =
3723           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3724               ? CGF.Builder.CreateICmpSLT(CVal, N)
3725               : CGF.Builder.CreateICmpULT(CVal, N);
3726       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3727       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3728       // body:
3729       CGF.EmitBlock(BodyBB);
3730       // Iteri = Begini + Counter * Stepi;
3731       CGF.EmitIgnoredExpr(HelperData.Update);
3732     }
3733   }
3734   ~OMPIteratorGeneratorScope() {
3735     if (!E)
3736       return;
3737     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3738       // Counter = Counter + 1;
3739       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3740       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3741       // goto cont;
3742       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3743       // exit:
3744       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3745     }
3746   }
3747 };
3748 } // namespace
3749 
3750 static std::pair<llvm::Value *, llvm::Value *>
3751 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3752   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3753   llvm::Value *Addr;
3754   if (OASE) {
3755     const Expr *Base = OASE->getBase();
3756     Addr = CGF.EmitScalarExpr(Base);
3757   } else {
3758     Addr = CGF.EmitLValue(E).getPointer(CGF);
3759   }
3760   llvm::Value *SizeVal;
3761   QualType Ty = E->getType();
3762   if (OASE) {
3763     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3764     for (const Expr *SE : OASE->getDimensions()) {
3765       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3766       Sz = CGF.EmitScalarConversion(
3767           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3768       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3769     }
3770   } else if (const auto *ASE =
3771                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3772     LValue UpAddrLVal =
3773         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3774     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3775     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3776         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3777     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3778     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3779     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3780   } else {
3781     SizeVal = CGF.getTypeSize(Ty);
3782   }
3783   return std::make_pair(Addr, SizeVal);
3784 }
3785 
3786 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3787 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3788   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3789   if (KmpTaskAffinityInfoTy.isNull()) {
3790     RecordDecl *KmpAffinityInfoRD =
3791         C.buildImplicitRecord("kmp_task_affinity_info_t");
3792     KmpAffinityInfoRD->startDefinition();
3793     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3794     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3795     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3796     KmpAffinityInfoRD->completeDefinition();
3797     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3798   }
3799 }
3800 
3801 CGOpenMPRuntime::TaskResultTy
3802 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3803                               const OMPExecutableDirective &D,
3804                               llvm::Function *TaskFunction, QualType SharedsTy,
3805                               Address Shareds, const OMPTaskDataTy &Data) {
3806   ASTContext &C = CGM.getContext();
3807   llvm::SmallVector<PrivateDataTy, 4> Privates;
3808   // Aggregate privates and sort them by the alignment.
3809   const auto *I = Data.PrivateCopies.begin();
3810   for (const Expr *E : Data.PrivateVars) {
3811     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3812     Privates.emplace_back(
3813         C.getDeclAlign(VD),
3814         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3815                          /*PrivateElemInit=*/nullptr));
3816     ++I;
3817   }
3818   I = Data.FirstprivateCopies.begin();
3819   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3820   for (const Expr *E : Data.FirstprivateVars) {
3821     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3822     Privates.emplace_back(
3823         C.getDeclAlign(VD),
3824         PrivateHelpersTy(
3825             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3826             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3827     ++I;
3828     ++IElemInitRef;
3829   }
3830   I = Data.LastprivateCopies.begin();
3831   for (const Expr *E : Data.LastprivateVars) {
3832     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3833     Privates.emplace_back(
3834         C.getDeclAlign(VD),
3835         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3836                          /*PrivateElemInit=*/nullptr));
3837     ++I;
3838   }
3839   for (const VarDecl *VD : Data.PrivateLocals) {
3840     if (isAllocatableDecl(VD))
3841       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3842     else
3843       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3844   }
3845   llvm::stable_sort(Privates,
3846                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3847                       return L.first > R.first;
3848                     });
3849   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3850   // Build type kmp_routine_entry_t (if not built yet).
3851   emitKmpRoutineEntryT(KmpInt32Ty);
3852   // Build type kmp_task_t (if not built yet).
3853   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3854     if (SavedKmpTaskloopTQTy.isNull()) {
3855       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3856           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3857     }
3858     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3859   } else {
3860     assert((D.getDirectiveKind() == OMPD_task ||
3861             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3862             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3863            "Expected taskloop, task or target directive");
3864     if (SavedKmpTaskTQTy.isNull()) {
3865       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3866           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3867     }
3868     KmpTaskTQTy = SavedKmpTaskTQTy;
3869   }
3870   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3871   // Build particular struct kmp_task_t for the given task.
3872   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3873       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3874   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3875   QualType KmpTaskTWithPrivatesPtrQTy =
3876       C.getPointerType(KmpTaskTWithPrivatesQTy);
3877   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3878   llvm::Type *KmpTaskTWithPrivatesPtrTy =
3879       KmpTaskTWithPrivatesTy->getPointerTo();
3880   llvm::Value *KmpTaskTWithPrivatesTySize =
3881       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3882   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3883 
3884   // Emit initial values for private copies (if any).
3885   llvm::Value *TaskPrivatesMap = nullptr;
3886   llvm::Type *TaskPrivatesMapTy =
3887       std::next(TaskFunction->arg_begin(), 3)->getType();
3888   if (!Privates.empty()) {
3889     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3890     TaskPrivatesMap =
3891         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3892     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3893         TaskPrivatesMap, TaskPrivatesMapTy);
3894   } else {
3895     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3896         cast<llvm::PointerType>(TaskPrivatesMapTy));
3897   }
3898   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3899   // kmp_task_t *tt);
3900   llvm::Function *TaskEntry = emitProxyTaskFunction(
3901       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3902       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3903       TaskPrivatesMap);
3904 
3905   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3906   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3907   // kmp_routine_entry_t *task_entry);
3908   // Task flags. Format is taken from
3909   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3910   // description of kmp_tasking_flags struct.
3911   enum {
3912     TiedFlag = 0x1,
3913     FinalFlag = 0x2,
3914     DestructorsFlag = 0x8,
3915     PriorityFlag = 0x20,
3916     DetachableFlag = 0x40,
3917   };
3918   unsigned Flags = Data.Tied ? TiedFlag : 0;
3919   bool NeedsCleanup = false;
3920   if (!Privates.empty()) {
3921     NeedsCleanup =
3922         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3923     if (NeedsCleanup)
3924       Flags = Flags | DestructorsFlag;
3925   }
3926   if (Data.Priority.getInt())
3927     Flags = Flags | PriorityFlag;
3928   if (D.hasClausesOfKind<OMPDetachClause>())
3929     Flags = Flags | DetachableFlag;
3930   llvm::Value *TaskFlags =
3931       Data.Final.getPointer()
3932           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3933                                      CGF.Builder.getInt32(FinalFlag),
3934                                      CGF.Builder.getInt32(/*C=*/0))
3935           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3936   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3937   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3938   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3939       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3940       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3941           TaskEntry, KmpRoutineEntryPtrTy)};
3942   llvm::Value *NewTask;
3943   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3944     // Check if we have any device clause associated with the directive.
3945     const Expr *Device = nullptr;
3946     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3947       Device = C->getDevice();
3948     // Emit device ID if any otherwise use default value.
3949     llvm::Value *DeviceID;
3950     if (Device)
3951       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3952                                            CGF.Int64Ty, /*isSigned=*/true);
3953     else
3954       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3955     AllocArgs.push_back(DeviceID);
3956     NewTask = CGF.EmitRuntimeCall(
3957         OMPBuilder.getOrCreateRuntimeFunction(
3958             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3959         AllocArgs);
3960   } else {
3961     NewTask =
3962         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3963                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3964                             AllocArgs);
3965   }
3966   // Emit detach clause initialization.
3967   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3968   // task_descriptor);
3969   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3970     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3971     LValue EvtLVal = CGF.EmitLValue(Evt);
3972 
3973     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3974     // int gtid, kmp_task_t *task);
3975     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3976     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3977     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3978     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3979         OMPBuilder.getOrCreateRuntimeFunction(
3980             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3981         {Loc, Tid, NewTask});
3982     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3983                                       Evt->getExprLoc());
3984     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3985   }
3986   // Process affinity clauses.
3987   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3988     // Process list of affinity data.
3989     ASTContext &C = CGM.getContext();
3990     Address AffinitiesArray = Address::invalid();
3991     // Calculate number of elements to form the array of affinity data.
3992     llvm::Value *NumOfElements = nullptr;
3993     unsigned NumAffinities = 0;
3994     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3995       if (const Expr *Modifier = C->getModifier()) {
3996         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3997         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3998           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3999           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4000           NumOfElements =
4001               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4002         }
4003       } else {
4004         NumAffinities += C->varlist_size();
4005       }
4006     }
4007     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4008     // Fields ids in kmp_task_affinity_info record.
4009     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4010 
4011     QualType KmpTaskAffinityInfoArrayTy;
4012     if (NumOfElements) {
4013       NumOfElements = CGF.Builder.CreateNUWAdd(
4014           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4015       auto *OVE = new (C) OpaqueValueExpr(
4016           Loc,
4017           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4018           VK_PRValue);
4019       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4020                                                     RValue::get(NumOfElements));
4021       KmpTaskAffinityInfoArrayTy =
4022           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4023                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4024       // Properly emit variable-sized array.
4025       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4026                                            ImplicitParamDecl::Other);
4027       CGF.EmitVarDecl(*PD);
4028       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4029       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4030                                                 /*isSigned=*/false);
4031     } else {
4032       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4033           KmpTaskAffinityInfoTy,
4034           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4035           ArrayType::Normal, /*IndexTypeQuals=*/0);
4036       AffinitiesArray =
4037           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4038       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4039       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4040                                              /*isSigned=*/false);
4041     }
4042 
4043     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4044     // Fill array by elements without iterators.
4045     unsigned Pos = 0;
4046     bool HasIterator = false;
4047     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4048       if (C->getModifier()) {
4049         HasIterator = true;
4050         continue;
4051       }
4052       for (const Expr *E : C->varlists()) {
4053         llvm::Value *Addr;
4054         llvm::Value *Size;
4055         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4056         LValue Base =
4057             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4058                                KmpTaskAffinityInfoTy);
4059         // affs[i].base_addr = &<Affinities[i].second>;
4060         LValue BaseAddrLVal = CGF.EmitLValueForField(
4061             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4062         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4063                               BaseAddrLVal);
4064         // affs[i].len = sizeof(<Affinities[i].second>);
4065         LValue LenLVal = CGF.EmitLValueForField(
4066             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4067         CGF.EmitStoreOfScalar(Size, LenLVal);
4068         ++Pos;
4069       }
4070     }
4071     LValue PosLVal;
4072     if (HasIterator) {
4073       PosLVal = CGF.MakeAddrLValue(
4074           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4075           C.getSizeType());
4076       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4077     }
4078     // Process elements with iterators.
4079     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4080       const Expr *Modifier = C->getModifier();
4081       if (!Modifier)
4082         continue;
4083       OMPIteratorGeneratorScope IteratorScope(
4084           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4085       for (const Expr *E : C->varlists()) {
4086         llvm::Value *Addr;
4087         llvm::Value *Size;
4088         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4089         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4090         LValue Base = CGF.MakeAddrLValue(
4091             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4092         // affs[i].base_addr = &<Affinities[i].second>;
4093         LValue BaseAddrLVal = CGF.EmitLValueForField(
4094             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4095         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4096                               BaseAddrLVal);
4097         // affs[i].len = sizeof(<Affinities[i].second>);
4098         LValue LenLVal = CGF.EmitLValueForField(
4099             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4100         CGF.EmitStoreOfScalar(Size, LenLVal);
4101         Idx = CGF.Builder.CreateNUWAdd(
4102             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4103         CGF.EmitStoreOfScalar(Idx, PosLVal);
4104       }
4105     }
4106     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4107     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4108     // naffins, kmp_task_affinity_info_t *affin_list);
4109     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4110     llvm::Value *GTid = getThreadID(CGF, Loc);
4111     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4112         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4113     // FIXME: Emit the function and ignore its result for now unless the
4114     // runtime function is properly implemented.
4115     (void)CGF.EmitRuntimeCall(
4116         OMPBuilder.getOrCreateRuntimeFunction(
4117             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4118         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4119   }
4120   llvm::Value *NewTaskNewTaskTTy =
4121       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4122           NewTask, KmpTaskTWithPrivatesPtrTy);
4123   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4124                                                KmpTaskTWithPrivatesQTy);
4125   LValue TDBase =
4126       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4127   // Fill the data in the resulting kmp_task_t record.
4128   // Copy shareds if there are any.
4129   Address KmpTaskSharedsPtr = Address::invalid();
4130   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4131     KmpTaskSharedsPtr = Address(
4132         CGF.EmitLoadOfScalar(
4133             CGF.EmitLValueForField(
4134                 TDBase,
4135                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4136             Loc),
4137         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4138     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4139     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4140     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4141   }
4142   // Emit initial values for private copies (if any).
4143   TaskResultTy Result;
4144   if (!Privates.empty()) {
4145     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4146                      SharedsTy, SharedsPtrTy, Data, Privates,
4147                      /*ForDup=*/false);
4148     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4149         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4150       Result.TaskDupFn = emitTaskDupFunction(
4151           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4152           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4153           /*WithLastIter=*/!Data.LastprivateVars.empty());
4154     }
4155   }
4156   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4157   enum { Priority = 0, Destructors = 1 };
4158   // Provide pointer to function with destructors for privates.
4159   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4160   const RecordDecl *KmpCmplrdataUD =
4161       (*FI)->getType()->getAsUnionType()->getDecl();
4162   if (NeedsCleanup) {
4163     llvm::Value *DestructorFn = emitDestructorsFunction(
4164         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4165         KmpTaskTWithPrivatesQTy);
4166     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4167     LValue DestructorsLV = CGF.EmitLValueForField(
4168         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4169     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4170                               DestructorFn, KmpRoutineEntryPtrTy),
4171                           DestructorsLV);
4172   }
4173   // Set priority.
4174   if (Data.Priority.getInt()) {
4175     LValue Data2LV = CGF.EmitLValueForField(
4176         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4177     LValue PriorityLV = CGF.EmitLValueForField(
4178         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4179     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4180   }
4181   Result.NewTask = NewTask;
4182   Result.TaskEntry = TaskEntry;
4183   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4184   Result.TDBase = TDBase;
4185   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4186   return Result;
4187 }
4188 
4189 /// Translates internal dependency kind into the runtime kind.
4190 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4191   RTLDependenceKindTy DepKind;
4192   switch (K) {
4193   case OMPC_DEPEND_in:
4194     DepKind = RTLDependenceKindTy::DepIn;
4195     break;
4196   // Out and InOut dependencies must use the same code.
4197   case OMPC_DEPEND_out:
4198   case OMPC_DEPEND_inout:
4199     DepKind = RTLDependenceKindTy::DepInOut;
4200     break;
4201   case OMPC_DEPEND_mutexinoutset:
4202     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4203     break;
4204   case OMPC_DEPEND_inoutset:
4205     DepKind = RTLDependenceKindTy::DepInOutSet;
4206     break;
4207   case OMPC_DEPEND_outallmemory:
4208     DepKind = RTLDependenceKindTy::DepOmpAllMem;
4209     break;
4210   case OMPC_DEPEND_source:
4211   case OMPC_DEPEND_sink:
4212   case OMPC_DEPEND_depobj:
4213   case OMPC_DEPEND_inoutallmemory:
4214   case OMPC_DEPEND_unknown:
4215     llvm_unreachable("Unknown task dependence type");
4216   }
4217   return DepKind;
4218 }
4219 
4220 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4221 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4222                            QualType &FlagsTy) {
4223   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4224   if (KmpDependInfoTy.isNull()) {
4225     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4226     KmpDependInfoRD->startDefinition();
4227     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4228     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4229     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4230     KmpDependInfoRD->completeDefinition();
4231     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4232   }
4233 }
4234 
4235 std::pair<llvm::Value *, LValue>
4236 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4237                                    SourceLocation Loc) {
4238   ASTContext &C = CGM.getContext();
4239   QualType FlagsTy;
4240   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241   RecordDecl *KmpDependInfoRD =
4242       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4243   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4244   LValue Base = CGF.EmitLoadOfPointerLValue(
4245       CGF.Builder.CreateElementBitCast(
4246           DepobjLVal.getAddress(CGF),
4247           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4248       KmpDependInfoPtrTy->castAs<PointerType>());
4249   Address DepObjAddr = CGF.Builder.CreateGEP(
4250       Base.getAddress(CGF),
4251       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4252   LValue NumDepsBase = CGF.MakeAddrLValue(
4253       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4254   // NumDeps = deps[i].base_addr;
4255   LValue BaseAddrLVal = CGF.EmitLValueForField(
4256       NumDepsBase,
4257       *std::next(KmpDependInfoRD->field_begin(),
4258                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4259   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4260   return std::make_pair(NumDeps, Base);
4261 }
4262 
4263 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4264                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4265                            const OMPTaskDataTy::DependData &Data,
4266                            Address DependenciesArray) {
4267   CodeGenModule &CGM = CGF.CGM;
4268   ASTContext &C = CGM.getContext();
4269   QualType FlagsTy;
4270   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4271   RecordDecl *KmpDependInfoRD =
4272       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4273   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4274 
4275   OMPIteratorGeneratorScope IteratorScope(
4276       CGF, cast_or_null<OMPIteratorExpr>(
4277                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4278                                  : nullptr));
4279   for (const Expr *E : Data.DepExprs) {
4280     llvm::Value *Addr;
4281     llvm::Value *Size;
4282 
4283     // The expression will be a nullptr in the 'omp_all_memory' case.
4284     if (E) {
4285       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4286       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4287     } else {
4288       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4289       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4290     }
4291     LValue Base;
4292     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4293       Base = CGF.MakeAddrLValue(
4294           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4295     } else {
4296       assert(E && "Expected a non-null expression");
4297       LValue &PosLVal = *Pos.get<LValue *>();
4298       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4299       Base = CGF.MakeAddrLValue(
4300           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4301     }
4302     // deps[i].base_addr = &<Dependencies[i].second>;
4303     LValue BaseAddrLVal = CGF.EmitLValueForField(
4304         Base,
4305         *std::next(KmpDependInfoRD->field_begin(),
4306                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4307     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4308     // deps[i].len = sizeof(<Dependencies[i].second>);
4309     LValue LenLVal = CGF.EmitLValueForField(
4310         Base, *std::next(KmpDependInfoRD->field_begin(),
4311                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4312     CGF.EmitStoreOfScalar(Size, LenLVal);
4313     // deps[i].flags = <Dependencies[i].first>;
4314     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4315     LValue FlagsLVal = CGF.EmitLValueForField(
4316         Base,
4317         *std::next(KmpDependInfoRD->field_begin(),
4318                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4319     CGF.EmitStoreOfScalar(
4320         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4321         FlagsLVal);
4322     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4323       ++(*P);
4324     } else {
4325       LValue &PosLVal = *Pos.get<LValue *>();
4326       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4327       Idx = CGF.Builder.CreateNUWAdd(Idx,
4328                                      llvm::ConstantInt::get(Idx->getType(), 1));
4329       CGF.EmitStoreOfScalar(Idx, PosLVal);
4330     }
4331   }
4332 }
4333 
4334 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4335     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4336     const OMPTaskDataTy::DependData &Data) {
4337   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4338          "Expected depobj dependency kind.");
4339   SmallVector<llvm::Value *, 4> Sizes;
4340   SmallVector<LValue, 4> SizeLVals;
4341   ASTContext &C = CGF.getContext();
4342   {
4343     OMPIteratorGeneratorScope IteratorScope(
4344         CGF, cast_or_null<OMPIteratorExpr>(
4345                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4346                                    : nullptr));
4347     for (const Expr *E : Data.DepExprs) {
4348       llvm::Value *NumDeps;
4349       LValue Base;
4350       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4351       std::tie(NumDeps, Base) =
4352           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4353       LValue NumLVal = CGF.MakeAddrLValue(
4354           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4355           C.getUIntPtrType());
4356       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4357                               NumLVal.getAddress(CGF));
4358       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4359       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4360       CGF.EmitStoreOfScalar(Add, NumLVal);
4361       SizeLVals.push_back(NumLVal);
4362     }
4363   }
4364   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4365     llvm::Value *Size =
4366         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4367     Sizes.push_back(Size);
4368   }
4369   return Sizes;
4370 }
4371 
4372 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4373                                          QualType &KmpDependInfoTy,
4374                                          LValue PosLVal,
4375                                          const OMPTaskDataTy::DependData &Data,
4376                                          Address DependenciesArray) {
4377   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4378          "Expected depobj dependency kind.");
4379   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4380   {
4381     OMPIteratorGeneratorScope IteratorScope(
4382         CGF, cast_or_null<OMPIteratorExpr>(
4383                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4384                                    : nullptr));
4385     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4386       const Expr *E = Data.DepExprs[I];
4387       llvm::Value *NumDeps;
4388       LValue Base;
4389       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4390       std::tie(NumDeps, Base) =
4391           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4392 
4393       // memcopy dependency data.
4394       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4395           ElSize,
4396           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4397       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4398       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4399       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4400 
4401       // Increase pos.
4402       // pos += size;
4403       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4404       CGF.EmitStoreOfScalar(Add, PosLVal);
4405     }
4406   }
4407 }
4408 
4409 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4410     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4411     SourceLocation Loc) {
4412   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4413         return D.DepExprs.empty();
4414       }))
4415     return std::make_pair(nullptr, Address::invalid());
4416   // Process list of dependencies.
4417   ASTContext &C = CGM.getContext();
4418   Address DependenciesArray = Address::invalid();
4419   llvm::Value *NumOfElements = nullptr;
4420   unsigned NumDependencies = std::accumulate(
4421       Dependencies.begin(), Dependencies.end(), 0,
4422       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4423         return D.DepKind == OMPC_DEPEND_depobj
4424                    ? V
4425                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4426       });
4427   QualType FlagsTy;
4428   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4429   bool HasDepobjDeps = false;
4430   bool HasRegularWithIterators = false;
4431   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4432   llvm::Value *NumOfRegularWithIterators =
4433       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4434   // Calculate number of depobj dependencies and regular deps with the
4435   // iterators.
4436   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4437     if (D.DepKind == OMPC_DEPEND_depobj) {
4438       SmallVector<llvm::Value *, 4> Sizes =
4439           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4440       for (llvm::Value *Size : Sizes) {
4441         NumOfDepobjElements =
4442             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4443       }
4444       HasDepobjDeps = true;
4445       continue;
4446     }
4447     // Include number of iterations, if any.
4448 
4449     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4450       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4451         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4452         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4453         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4454             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4455         NumOfRegularWithIterators =
4456             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4457       }
4458       HasRegularWithIterators = true;
4459       continue;
4460     }
4461   }
4462 
4463   QualType KmpDependInfoArrayTy;
4464   if (HasDepobjDeps || HasRegularWithIterators) {
4465     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4466                                            /*isSigned=*/false);
4467     if (HasDepobjDeps) {
4468       NumOfElements =
4469           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4470     }
4471     if (HasRegularWithIterators) {
4472       NumOfElements =
4473           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4474     }
4475     auto *OVE = new (C) OpaqueValueExpr(
4476         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4477         VK_PRValue);
4478     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4479                                                   RValue::get(NumOfElements));
4480     KmpDependInfoArrayTy =
4481         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4482                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4483     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4484     // Properly emit variable-sized array.
4485     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4486                                          ImplicitParamDecl::Other);
4487     CGF.EmitVarDecl(*PD);
4488     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4489     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4490                                               /*isSigned=*/false);
4491   } else {
4492     KmpDependInfoArrayTy = C.getConstantArrayType(
4493         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4494         ArrayType::Normal, /*IndexTypeQuals=*/0);
4495     DependenciesArray =
4496         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4497     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4498     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4499                                            /*isSigned=*/false);
4500   }
4501   unsigned Pos = 0;
4502   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4503     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4504         Dependencies[I].IteratorExpr)
4505       continue;
4506     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4507                    DependenciesArray);
4508   }
4509   // Copy regular dependencies with iterators.
4510   LValue PosLVal = CGF.MakeAddrLValue(
4511       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4512   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4513   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4514     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4515         !Dependencies[I].IteratorExpr)
4516       continue;
4517     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4518                    DependenciesArray);
4519   }
4520   // Copy final depobj arrays without iterators.
4521   if (HasDepobjDeps) {
4522     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4523       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4524         continue;
4525       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4526                          DependenciesArray);
4527     }
4528   }
4529   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4530       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4531   return std::make_pair(NumOfElements, DependenciesArray);
4532 }
4533 
4534 Address CGOpenMPRuntime::emitDepobjDependClause(
4535     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4536     SourceLocation Loc) {
4537   if (Dependencies.DepExprs.empty())
4538     return Address::invalid();
4539   // Process list of dependencies.
4540   ASTContext &C = CGM.getContext();
4541   Address DependenciesArray = Address::invalid();
4542   unsigned NumDependencies = Dependencies.DepExprs.size();
4543   QualType FlagsTy;
4544   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4545   RecordDecl *KmpDependInfoRD =
4546       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4547 
4548   llvm::Value *Size;
4549   // Define type kmp_depend_info[<Dependencies.size()>];
4550   // For depobj reserve one extra element to store the number of elements.
4551   // It is required to handle depobj(x) update(in) construct.
4552   // kmp_depend_info[<Dependencies.size()>] deps;
4553   llvm::Value *NumDepsVal;
4554   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4555   if (const auto *IE =
4556           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4557     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4558     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4559       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4560       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4561       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4562     }
4563     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4564                                     NumDepsVal);
4565     CharUnits SizeInBytes =
4566         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4567     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4568     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4569     NumDepsVal =
4570         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4571   } else {
4572     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4573         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4574         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4575     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4576     Size = CGM.getSize(Sz.alignTo(Align));
4577     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4578   }
4579   // Need to allocate on the dynamic memory.
4580   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4581   // Use default allocator.
4582   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4583   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4584 
4585   llvm::Value *Addr =
4586       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4587                               CGM.getModule(), OMPRTL___kmpc_alloc),
4588                           Args, ".dep.arr.addr");
4589   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4590   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4591       Addr, KmpDependInfoLlvmTy->getPointerTo());
4592   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4593   // Write number of elements in the first element of array for depobj.
4594   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4595   // deps[i].base_addr = NumDependencies;
4596   LValue BaseAddrLVal = CGF.EmitLValueForField(
4597       Base,
4598       *std::next(KmpDependInfoRD->field_begin(),
4599                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4600   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4601   llvm::PointerUnion<unsigned *, LValue *> Pos;
4602   unsigned Idx = 1;
4603   LValue PosLVal;
4604   if (Dependencies.IteratorExpr) {
4605     PosLVal = CGF.MakeAddrLValue(
4606         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4607         C.getSizeType());
4608     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4609                           /*IsInit=*/true);
4610     Pos = &PosLVal;
4611   } else {
4612     Pos = &Idx;
4613   }
4614   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4615   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4616       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4617       CGF.Int8Ty);
4618   return DependenciesArray;
4619 }
4620 
4621 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4622                                         SourceLocation Loc) {
4623   ASTContext &C = CGM.getContext();
4624   QualType FlagsTy;
4625   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4626   LValue Base = CGF.EmitLoadOfPointerLValue(
4627       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4628   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4629   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4630       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4631       CGF.ConvertTypeForMem(KmpDependInfoTy));
4632   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4633       Addr.getElementType(), Addr.getPointer(),
4634       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4635   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4636                                                                CGF.VoidPtrTy);
4637   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4638   // Use default allocator.
4639   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4640   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4641 
4642   // _kmpc_free(gtid, addr, nullptr);
4643   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4644                                 CGM.getModule(), OMPRTL___kmpc_free),
4645                             Args);
4646 }
4647 
4648 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4649                                        OpenMPDependClauseKind NewDepKind,
4650                                        SourceLocation Loc) {
4651   ASTContext &C = CGM.getContext();
4652   QualType FlagsTy;
4653   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4654   RecordDecl *KmpDependInfoRD =
4655       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4656   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4657   llvm::Value *NumDeps;
4658   LValue Base;
4659   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4660 
4661   Address Begin = Base.getAddress(CGF);
4662   // Cast from pointer to array type to pointer to single element.
4663   llvm::Value *End = CGF.Builder.CreateGEP(
4664       Begin.getElementType(), Begin.getPointer(), NumDeps);
4665   // The basic structure here is a while-do loop.
4666   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4667   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4668   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4669   CGF.EmitBlock(BodyBB);
4670   llvm::PHINode *ElementPHI =
4671       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4672   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4673   Begin = Begin.withPointer(ElementPHI);
4674   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4675                             Base.getTBAAInfo());
4676   // deps[i].flags = NewDepKind;
4677   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4678   LValue FlagsLVal = CGF.EmitLValueForField(
4679       Base, *std::next(KmpDependInfoRD->field_begin(),
4680                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4681   CGF.EmitStoreOfScalar(
4682       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4683       FlagsLVal);
4684 
4685   // Shift the address forward by one element.
4686   Address ElementNext =
4687       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4688   ElementPHI->addIncoming(ElementNext.getPointer(),
4689                           CGF.Builder.GetInsertBlock());
4690   llvm::Value *IsEmpty =
4691       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4692   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4693   // Done.
4694   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4695 }
4696 
4697 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4698                                    const OMPExecutableDirective &D,
4699                                    llvm::Function *TaskFunction,
4700                                    QualType SharedsTy, Address Shareds,
4701                                    const Expr *IfCond,
4702                                    const OMPTaskDataTy &Data) {
4703   if (!CGF.HaveInsertPoint())
4704     return;
4705 
4706   TaskResultTy Result =
4707       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4708   llvm::Value *NewTask = Result.NewTask;
4709   llvm::Function *TaskEntry = Result.TaskEntry;
4710   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4711   LValue TDBase = Result.TDBase;
4712   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4713   // Process list of dependences.
4714   Address DependenciesArray = Address::invalid();
4715   llvm::Value *NumOfElements;
4716   std::tie(NumOfElements, DependenciesArray) =
4717       emitDependClause(CGF, Data.Dependences, Loc);
4718 
4719   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4720   // libcall.
4721   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4722   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4723   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4724   // list is not empty
4725   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4726   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4727   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4728   llvm::Value *DepTaskArgs[7];
4729   if (!Data.Dependences.empty()) {
4730     DepTaskArgs[0] = UpLoc;
4731     DepTaskArgs[1] = ThreadID;
4732     DepTaskArgs[2] = NewTask;
4733     DepTaskArgs[3] = NumOfElements;
4734     DepTaskArgs[4] = DependenciesArray.getPointer();
4735     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4736     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4737   }
4738   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4739                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4740     if (!Data.Tied) {
4741       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4742       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4743       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4744     }
4745     if (!Data.Dependences.empty()) {
4746       CGF.EmitRuntimeCall(
4747           OMPBuilder.getOrCreateRuntimeFunction(
4748               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4749           DepTaskArgs);
4750     } else {
4751       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4752                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4753                           TaskArgs);
4754     }
4755     // Check if parent region is untied and build return for untied task;
4756     if (auto *Region =
4757             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4758       Region->emitUntiedSwitch(CGF);
4759   };
4760 
4761   llvm::Value *DepWaitTaskArgs[7];
4762   if (!Data.Dependences.empty()) {
4763     DepWaitTaskArgs[0] = UpLoc;
4764     DepWaitTaskArgs[1] = ThreadID;
4765     DepWaitTaskArgs[2] = NumOfElements;
4766     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4767     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4768     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4769     DepWaitTaskArgs[6] =
4770         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4771   }
4772   auto &M = CGM.getModule();
4773   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4774                         TaskEntry, &Data, &DepWaitTaskArgs,
4775                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4776     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4777     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4778     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4779     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4780     // is specified.
4781     if (!Data.Dependences.empty())
4782       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4783                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4784                           DepWaitTaskArgs);
4785     // Call proxy_task_entry(gtid, new_task);
4786     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4787                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4788       Action.Enter(CGF);
4789       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4790       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4791                                                           OutlinedFnArgs);
4792     };
4793 
4794     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4795     // kmp_task_t *new_task);
4796     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4797     // kmp_task_t *new_task);
4798     RegionCodeGenTy RCG(CodeGen);
4799     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4800                               M, OMPRTL___kmpc_omp_task_begin_if0),
4801                           TaskArgs,
4802                           OMPBuilder.getOrCreateRuntimeFunction(
4803                               M, OMPRTL___kmpc_omp_task_complete_if0),
4804                           TaskArgs);
4805     RCG.setAction(Action);
4806     RCG(CGF);
4807   };
4808 
4809   if (IfCond) {
4810     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4811   } else {
4812     RegionCodeGenTy ThenRCG(ThenCodeGen);
4813     ThenRCG(CGF);
4814   }
4815 }
4816 
4817 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4818                                        const OMPLoopDirective &D,
4819                                        llvm::Function *TaskFunction,
4820                                        QualType SharedsTy, Address Shareds,
4821                                        const Expr *IfCond,
4822                                        const OMPTaskDataTy &Data) {
4823   if (!CGF.HaveInsertPoint())
4824     return;
4825   TaskResultTy Result =
4826       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4827   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4828   // libcall.
4829   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4830   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4831   // sched, kmp_uint64 grainsize, void *task_dup);
4832   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4833   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4834   llvm::Value *IfVal;
4835   if (IfCond) {
4836     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4837                                       /*isSigned=*/true);
4838   } else {
4839     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4840   }
4841 
4842   LValue LBLVal = CGF.EmitLValueForField(
4843       Result.TDBase,
4844       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4845   const auto *LBVar =
4846       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4847   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4848                        LBLVal.getQuals(),
4849                        /*IsInitializer=*/true);
4850   LValue UBLVal = CGF.EmitLValueForField(
4851       Result.TDBase,
4852       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4853   const auto *UBVar =
4854       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4855   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4856                        UBLVal.getQuals(),
4857                        /*IsInitializer=*/true);
4858   LValue StLVal = CGF.EmitLValueForField(
4859       Result.TDBase,
4860       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4861   const auto *StVar =
4862       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4863   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4864                        StLVal.getQuals(),
4865                        /*IsInitializer=*/true);
4866   // Store reductions address.
4867   LValue RedLVal = CGF.EmitLValueForField(
4868       Result.TDBase,
4869       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4870   if (Data.Reductions) {
4871     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4872   } else {
4873     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4874                                CGF.getContext().VoidPtrTy);
4875   }
4876   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4877   llvm::Value *TaskArgs[] = {
4878       UpLoc,
4879       ThreadID,
4880       Result.NewTask,
4881       IfVal,
4882       LBLVal.getPointer(CGF),
4883       UBLVal.getPointer(CGF),
4884       CGF.EmitLoadOfScalar(StLVal, Loc),
4885       llvm::ConstantInt::getSigned(
4886           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4887       llvm::ConstantInt::getSigned(
4888           CGF.IntTy, Data.Schedule.getPointer()
4889                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4890                          : NoSchedule),
4891       Data.Schedule.getPointer()
4892           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4893                                       /*isSigned=*/false)
4894           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4895       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4896                              Result.TaskDupFn, CGF.VoidPtrTy)
4897                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4898   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4899                           CGM.getModule(), OMPRTL___kmpc_taskloop),
4900                       TaskArgs);
4901 }
4902 
4903 /// Emit reduction operation for each element of array (required for
4904 /// array sections) LHS op = RHS.
4905 /// \param Type Type of array.
4906 /// \param LHSVar Variable on the left side of the reduction operation
4907 /// (references element of array in original variable).
4908 /// \param RHSVar Variable on the right side of the reduction operation
4909 /// (references element of array in original variable).
4910 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4911 /// RHSVar.
4912 static void EmitOMPAggregateReduction(
4913     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4914     const VarDecl *RHSVar,
4915     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4916                                   const Expr *, const Expr *)> &RedOpGen,
4917     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4918     const Expr *UpExpr = nullptr) {
4919   // Perform element-by-element initialization.
4920   QualType ElementTy;
4921   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4922   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4923 
4924   // Drill down to the base element type on both arrays.
4925   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4926   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4927 
4928   llvm::Value *RHSBegin = RHSAddr.getPointer();
4929   llvm::Value *LHSBegin = LHSAddr.getPointer();
4930   // Cast from pointer to array type to pointer to single element.
4931   llvm::Value *LHSEnd =
4932       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4933   // The basic structure here is a while-do loop.
4934   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4935   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4936   llvm::Value *IsEmpty =
4937       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4938   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4939 
4940   // Enter the loop body, making that address the current address.
4941   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4942   CGF.EmitBlock(BodyBB);
4943 
4944   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4945 
4946   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4947       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4948   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4949   Address RHSElementCurrent(
4950       RHSElementPHI, RHSAddr.getElementType(),
4951       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4952 
4953   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4954       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4955   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4956   Address LHSElementCurrent(
4957       LHSElementPHI, LHSAddr.getElementType(),
4958       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4959 
4960   // Emit copy.
4961   CodeGenFunction::OMPPrivateScope Scope(CGF);
4962   Scope.addPrivate(LHSVar, LHSElementCurrent);
4963   Scope.addPrivate(RHSVar, RHSElementCurrent);
4964   Scope.Privatize();
4965   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4966   Scope.ForceCleanup();
4967 
4968   // Shift the address forward by one element.
4969   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4970       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4971       "omp.arraycpy.dest.element");
4972   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4973       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4974       "omp.arraycpy.src.element");
4975   // Check whether we've reached the end.
4976   llvm::Value *Done =
4977       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4978   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4979   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4980   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4981 
4982   // Done.
4983   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4984 }
4985 
4986 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4987 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4988 /// UDR combiner function.
4989 static void emitReductionCombiner(CodeGenFunction &CGF,
4990                                   const Expr *ReductionOp) {
4991   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4992     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4993       if (const auto *DRE =
4994               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4995         if (const auto *DRD =
4996                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4997           std::pair<llvm::Function *, llvm::Function *> Reduction =
4998               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4999           RValue Func = RValue::get(Reduction.first);
5000           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5001           CGF.EmitIgnoredExpr(ReductionOp);
5002           return;
5003         }
5004   CGF.EmitIgnoredExpr(ReductionOp);
5005 }
5006 
5007 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5008     SourceLocation Loc, llvm::Type *ArgsElemType,
5009     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5010     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5011   ASTContext &C = CGM.getContext();
5012 
5013   // void reduction_func(void *LHSArg, void *RHSArg);
5014   FunctionArgList Args;
5015   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5016                            ImplicitParamDecl::Other);
5017   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5018                            ImplicitParamDecl::Other);
5019   Args.push_back(&LHSArg);
5020   Args.push_back(&RHSArg);
5021   const auto &CGFI =
5022       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5023   std::string Name = getName({"omp", "reduction", "reduction_func"});
5024   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5025                                     llvm::GlobalValue::InternalLinkage, Name,
5026                                     &CGM.getModule());
5027   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5028   Fn->setDoesNotRecurse();
5029   CodeGenFunction CGF(CGM);
5030   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5031 
5032   // Dst = (void*[n])(LHSArg);
5033   // Src = (void*[n])(RHSArg);
5034   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5035                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5036                   ArgsElemType->getPointerTo()),
5037               ArgsElemType, CGF.getPointerAlign());
5038   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5039                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5040                   ArgsElemType->getPointerTo()),
5041               ArgsElemType, CGF.getPointerAlign());
5042 
5043   //  ...
5044   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5045   //  ...
5046   CodeGenFunction::OMPPrivateScope Scope(CGF);
5047   const auto *IPriv = Privates.begin();
5048   unsigned Idx = 0;
5049   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5050     const auto *RHSVar =
5051         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5052     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5053     const auto *LHSVar =
5054         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5055     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5056     QualType PrivTy = (*IPriv)->getType();
5057     if (PrivTy->isVariablyModifiedType()) {
5058       // Get array size and emit VLA type.
5059       ++Idx;
5060       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5061       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5062       const VariableArrayType *VLA =
5063           CGF.getContext().getAsVariableArrayType(PrivTy);
5064       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5065       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5066           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5067       CGF.EmitVariablyModifiedType(PrivTy);
5068     }
5069   }
5070   Scope.Privatize();
5071   IPriv = Privates.begin();
5072   const auto *ILHS = LHSExprs.begin();
5073   const auto *IRHS = RHSExprs.begin();
5074   for (const Expr *E : ReductionOps) {
5075     if ((*IPriv)->getType()->isArrayType()) {
5076       // Emit reduction for array section.
5077       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5078       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5079       EmitOMPAggregateReduction(
5080           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5081           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5082             emitReductionCombiner(CGF, E);
5083           });
5084     } else {
5085       // Emit reduction for array subscript or single variable.
5086       emitReductionCombiner(CGF, E);
5087     }
5088     ++IPriv;
5089     ++ILHS;
5090     ++IRHS;
5091   }
5092   Scope.ForceCleanup();
5093   CGF.FinishFunction();
5094   return Fn;
5095 }
5096 
5097 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5098                                                   const Expr *ReductionOp,
5099                                                   const Expr *PrivateRef,
5100                                                   const DeclRefExpr *LHS,
5101                                                   const DeclRefExpr *RHS) {
5102   if (PrivateRef->getType()->isArrayType()) {
5103     // Emit reduction for array section.
5104     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5105     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5106     EmitOMPAggregateReduction(
5107         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5108         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5109           emitReductionCombiner(CGF, ReductionOp);
5110         });
5111   } else {
5112     // Emit reduction for array subscript or single variable.
5113     emitReductionCombiner(CGF, ReductionOp);
5114   }
5115 }
5116 
5117 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5118                                     ArrayRef<const Expr *> Privates,
5119                                     ArrayRef<const Expr *> LHSExprs,
5120                                     ArrayRef<const Expr *> RHSExprs,
5121                                     ArrayRef<const Expr *> ReductionOps,
5122                                     ReductionOptionsTy Options) {
5123   if (!CGF.HaveInsertPoint())
5124     return;
5125 
5126   bool WithNowait = Options.WithNowait;
5127   bool SimpleReduction = Options.SimpleReduction;
5128 
5129   // Next code should be emitted for reduction:
5130   //
5131   // static kmp_critical_name lock = { 0 };
5132   //
5133   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5134   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5135   //  ...
5136   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5137   //  *(Type<n>-1*)rhs[<n>-1]);
5138   // }
5139   //
5140   // ...
5141   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5142   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5143   // RedList, reduce_func, &<lock>)) {
5144   // case 1:
5145   //  ...
5146   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5147   //  ...
5148   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5149   // break;
5150   // case 2:
5151   //  ...
5152   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5153   //  ...
5154   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5155   // break;
5156   // default:;
5157   // }
5158   //
5159   // if SimpleReduction is true, only the next code is generated:
5160   //  ...
5161   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5162   //  ...
5163 
5164   ASTContext &C = CGM.getContext();
5165 
5166   if (SimpleReduction) {
5167     CodeGenFunction::RunCleanupsScope Scope(CGF);
5168     const auto *IPriv = Privates.begin();
5169     const auto *ILHS = LHSExprs.begin();
5170     const auto *IRHS = RHSExprs.begin();
5171     for (const Expr *E : ReductionOps) {
5172       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5173                                   cast<DeclRefExpr>(*IRHS));
5174       ++IPriv;
5175       ++ILHS;
5176       ++IRHS;
5177     }
5178     return;
5179   }
5180 
5181   // 1. Build a list of reduction variables.
5182   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5183   auto Size = RHSExprs.size();
5184   for (const Expr *E : Privates) {
5185     if (E->getType()->isVariablyModifiedType())
5186       // Reserve place for array size.
5187       ++Size;
5188   }
5189   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5190   QualType ReductionArrayTy =
5191       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5192                              /*IndexTypeQuals=*/0);
5193   Address ReductionList =
5194       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5195   const auto *IPriv = Privates.begin();
5196   unsigned Idx = 0;
5197   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5198     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5199     CGF.Builder.CreateStore(
5200         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5201             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5202         Elem);
5203     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5204       // Store array size.
5205       ++Idx;
5206       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5207       llvm::Value *Size = CGF.Builder.CreateIntCast(
5208           CGF.getVLASize(
5209                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5210               .NumElts,
5211           CGF.SizeTy, /*isSigned=*/false);
5212       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5213                               Elem);
5214     }
5215   }
5216 
5217   // 2. Emit reduce_func().
5218   llvm::Function *ReductionFn =
5219       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5220                             Privates, LHSExprs, RHSExprs, ReductionOps);
5221 
5222   // 3. Create static kmp_critical_name lock = { 0 };
5223   std::string Name = getName({"reduction"});
5224   llvm::Value *Lock = getCriticalRegionLock(Name);
5225 
5226   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5227   // RedList, reduce_func, &<lock>);
5228   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5229   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5230   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5231   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5232       ReductionList.getPointer(), CGF.VoidPtrTy);
5233   llvm::Value *Args[] = {
5234       IdentTLoc,                             // ident_t *<loc>
5235       ThreadId,                              // i32 <gtid>
5236       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5237       ReductionArrayTySize,                  // size_type sizeof(RedList)
5238       RL,                                    // void *RedList
5239       ReductionFn, // void (*) (void *, void *) <reduce_func>
5240       Lock         // kmp_critical_name *&<lock>
5241   };
5242   llvm::Value *Res = CGF.EmitRuntimeCall(
5243       OMPBuilder.getOrCreateRuntimeFunction(
5244           CGM.getModule(),
5245           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5246       Args);
5247 
5248   // 5. Build switch(res)
5249   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5250   llvm::SwitchInst *SwInst =
5251       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5252 
5253   // 6. Build case 1:
5254   //  ...
5255   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5256   //  ...
5257   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5258   // break;
5259   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5260   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5261   CGF.EmitBlock(Case1BB);
5262 
5263   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5264   llvm::Value *EndArgs[] = {
5265       IdentTLoc, // ident_t *<loc>
5266       ThreadId,  // i32 <gtid>
5267       Lock       // kmp_critical_name *&<lock>
5268   };
5269   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5270                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5271     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5272     const auto *IPriv = Privates.begin();
5273     const auto *ILHS = LHSExprs.begin();
5274     const auto *IRHS = RHSExprs.begin();
5275     for (const Expr *E : ReductionOps) {
5276       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5277                                      cast<DeclRefExpr>(*IRHS));
5278       ++IPriv;
5279       ++ILHS;
5280       ++IRHS;
5281     }
5282   };
5283   RegionCodeGenTy RCG(CodeGen);
5284   CommonActionTy Action(
5285       nullptr, std::nullopt,
5286       OMPBuilder.getOrCreateRuntimeFunction(
5287           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5288                                       : OMPRTL___kmpc_end_reduce),
5289       EndArgs);
5290   RCG.setAction(Action);
5291   RCG(CGF);
5292 
5293   CGF.EmitBranch(DefaultBB);
5294 
5295   // 7. Build case 2:
5296   //  ...
5297   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5298   //  ...
5299   // break;
5300   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5301   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5302   CGF.EmitBlock(Case2BB);
5303 
5304   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5305                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5306     const auto *ILHS = LHSExprs.begin();
5307     const auto *IRHS = RHSExprs.begin();
5308     const auto *IPriv = Privates.begin();
5309     for (const Expr *E : ReductionOps) {
5310       const Expr *XExpr = nullptr;
5311       const Expr *EExpr = nullptr;
5312       const Expr *UpExpr = nullptr;
5313       BinaryOperatorKind BO = BO_Comma;
5314       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5315         if (BO->getOpcode() == BO_Assign) {
5316           XExpr = BO->getLHS();
5317           UpExpr = BO->getRHS();
5318         }
5319       }
5320       // Try to emit update expression as a simple atomic.
5321       const Expr *RHSExpr = UpExpr;
5322       if (RHSExpr) {
5323         // Analyze RHS part of the whole expression.
5324         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5325                 RHSExpr->IgnoreParenImpCasts())) {
5326           // If this is a conditional operator, analyze its condition for
5327           // min/max reduction operator.
5328           RHSExpr = ACO->getCond();
5329         }
5330         if (const auto *BORHS =
5331                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5332           EExpr = BORHS->getRHS();
5333           BO = BORHS->getOpcode();
5334         }
5335       }
5336       if (XExpr) {
5337         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5338         auto &&AtomicRedGen = [BO, VD,
5339                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5340                                     const Expr *EExpr, const Expr *UpExpr) {
5341           LValue X = CGF.EmitLValue(XExpr);
5342           RValue E;
5343           if (EExpr)
5344             E = CGF.EmitAnyExpr(EExpr);
5345           CGF.EmitOMPAtomicSimpleUpdateExpr(
5346               X, E, BO, /*IsXLHSInRHSPart=*/true,
5347               llvm::AtomicOrdering::Monotonic, Loc,
5348               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5349                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5350                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5351                 CGF.emitOMPSimpleStore(
5352                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5353                     VD->getType().getNonReferenceType(), Loc);
5354                 PrivateScope.addPrivate(VD, LHSTemp);
5355                 (void)PrivateScope.Privatize();
5356                 return CGF.EmitAnyExpr(UpExpr);
5357               });
5358         };
5359         if ((*IPriv)->getType()->isArrayType()) {
5360           // Emit atomic reduction for array section.
5361           const auto *RHSVar =
5362               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5363           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5364                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5365         } else {
5366           // Emit atomic reduction for array subscript or single variable.
5367           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5368         }
5369       } else {
5370         // Emit as a critical region.
5371         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5372                                            const Expr *, const Expr *) {
5373           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5374           std::string Name = RT.getName({"atomic_reduction"});
5375           RT.emitCriticalRegion(
5376               CGF, Name,
5377               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5378                 Action.Enter(CGF);
5379                 emitReductionCombiner(CGF, E);
5380               },
5381               Loc);
5382         };
5383         if ((*IPriv)->getType()->isArrayType()) {
5384           const auto *LHSVar =
5385               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5386           const auto *RHSVar =
5387               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5388           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5389                                     CritRedGen);
5390         } else {
5391           CritRedGen(CGF, nullptr, nullptr, nullptr);
5392         }
5393       }
5394       ++ILHS;
5395       ++IRHS;
5396       ++IPriv;
5397     }
5398   };
5399   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5400   if (!WithNowait) {
5401     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5402     llvm::Value *EndArgs[] = {
5403         IdentTLoc, // ident_t *<loc>
5404         ThreadId,  // i32 <gtid>
5405         Lock       // kmp_critical_name *&<lock>
5406     };
5407     CommonActionTy Action(nullptr, std::nullopt,
5408                           OMPBuilder.getOrCreateRuntimeFunction(
5409                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5410                           EndArgs);
5411     AtomicRCG.setAction(Action);
5412     AtomicRCG(CGF);
5413   } else {
5414     AtomicRCG(CGF);
5415   }
5416 
5417   CGF.EmitBranch(DefaultBB);
5418   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5419 }
5420 
5421 /// Generates unique name for artificial threadprivate variables.
5422 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5423 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5424                                       const Expr *Ref) {
5425   SmallString<256> Buffer;
5426   llvm::raw_svector_ostream Out(Buffer);
5427   const clang::DeclRefExpr *DE;
5428   const VarDecl *D = ::getBaseDecl(Ref, DE);
5429   if (!D)
5430     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5431   D = D->getCanonicalDecl();
5432   std::string Name = CGM.getOpenMPRuntime().getName(
5433       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5434   Out << Prefix << Name << "_"
5435       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5436   return std::string(Out.str());
5437 }
5438 
5439 /// Emits reduction initializer function:
5440 /// \code
5441 /// void @.red_init(void* %arg, void* %orig) {
5442 /// %0 = bitcast void* %arg to <type>*
5443 /// store <type> <init>, <type>* %0
5444 /// ret void
5445 /// }
5446 /// \endcode
5447 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5448                                            SourceLocation Loc,
5449                                            ReductionCodeGen &RCG, unsigned N) {
5450   ASTContext &C = CGM.getContext();
5451   QualType VoidPtrTy = C.VoidPtrTy;
5452   VoidPtrTy.addRestrict();
5453   FunctionArgList Args;
5454   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5455                           ImplicitParamDecl::Other);
5456   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5457                               ImplicitParamDecl::Other);
5458   Args.emplace_back(&Param);
5459   Args.emplace_back(&ParamOrig);
5460   const auto &FnInfo =
5461       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5462   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5463   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5464   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5465                                     Name, &CGM.getModule());
5466   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5467   Fn->setDoesNotRecurse();
5468   CodeGenFunction CGF(CGM);
5469   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5470   QualType PrivateType = RCG.getPrivateType(N);
5471   Address PrivateAddr = CGF.EmitLoadOfPointer(
5472       CGF.Builder.CreateElementBitCast(
5473           CGF.GetAddrOfLocalVar(&Param),
5474           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5475       C.getPointerType(PrivateType)->castAs<PointerType>());
5476   llvm::Value *Size = nullptr;
5477   // If the size of the reduction item is non-constant, load it from global
5478   // threadprivate variable.
5479   if (RCG.getSizes(N).second) {
5480     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5481         CGF, CGM.getContext().getSizeType(),
5482         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5483     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5484                                 CGM.getContext().getSizeType(), Loc);
5485   }
5486   RCG.emitAggregateType(CGF, N, Size);
5487   Address OrigAddr = Address::invalid();
5488   // If initializer uses initializer from declare reduction construct, emit a
5489   // pointer to the address of the original reduction item (reuired by reduction
5490   // initializer)
5491   if (RCG.usesReductionInitializer(N)) {
5492     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5493     OrigAddr = CGF.EmitLoadOfPointer(
5494         SharedAddr,
5495         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5496   }
5497   // Emit the initializer:
5498   // %0 = bitcast void* %arg to <type>*
5499   // store <type> <init>, <type>* %0
5500   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5501                          [](CodeGenFunction &) { return false; });
5502   CGF.FinishFunction();
5503   return Fn;
5504 }
5505 
5506 /// Emits reduction combiner function:
5507 /// \code
5508 /// void @.red_comb(void* %arg0, void* %arg1) {
5509 /// %lhs = bitcast void* %arg0 to <type>*
5510 /// %rhs = bitcast void* %arg1 to <type>*
5511 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5512 /// store <type> %2, <type>* %lhs
5513 /// ret void
5514 /// }
5515 /// \endcode
5516 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5517                                            SourceLocation Loc,
5518                                            ReductionCodeGen &RCG, unsigned N,
5519                                            const Expr *ReductionOp,
5520                                            const Expr *LHS, const Expr *RHS,
5521                                            const Expr *PrivateRef) {
5522   ASTContext &C = CGM.getContext();
5523   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5524   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5525   FunctionArgList Args;
5526   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5527                                C.VoidPtrTy, ImplicitParamDecl::Other);
5528   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5529                             ImplicitParamDecl::Other);
5530   Args.emplace_back(&ParamInOut);
5531   Args.emplace_back(&ParamIn);
5532   const auto &FnInfo =
5533       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5534   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5535   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5536   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5537                                     Name, &CGM.getModule());
5538   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5539   Fn->setDoesNotRecurse();
5540   CodeGenFunction CGF(CGM);
5541   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5542   llvm::Value *Size = nullptr;
5543   // If the size of the reduction item is non-constant, load it from global
5544   // threadprivate variable.
5545   if (RCG.getSizes(N).second) {
5546     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5547         CGF, CGM.getContext().getSizeType(),
5548         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5549     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5550                                 CGM.getContext().getSizeType(), Loc);
5551   }
5552   RCG.emitAggregateType(CGF, N, Size);
5553   // Remap lhs and rhs variables to the addresses of the function arguments.
5554   // %lhs = bitcast void* %arg0 to <type>*
5555   // %rhs = bitcast void* %arg1 to <type>*
5556   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5557   PrivateScope.addPrivate(
5558       LHSVD,
5559       // Pull out the pointer to the variable.
5560       CGF.EmitLoadOfPointer(
5561           CGF.Builder.CreateElementBitCast(
5562               CGF.GetAddrOfLocalVar(&ParamInOut),
5563               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5564           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5565   PrivateScope.addPrivate(
5566       RHSVD,
5567       // Pull out the pointer to the variable.
5568       CGF.EmitLoadOfPointer(
5569           CGF.Builder.CreateElementBitCast(
5570             CGF.GetAddrOfLocalVar(&ParamIn),
5571             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5572           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5573   PrivateScope.Privatize();
5574   // Emit the combiner body:
5575   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5576   // store <type> %2, <type>* %lhs
5577   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5578       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5579       cast<DeclRefExpr>(RHS));
5580   CGF.FinishFunction();
5581   return Fn;
5582 }
5583 
5584 /// Emits reduction finalizer function:
5585 /// \code
5586 /// void @.red_fini(void* %arg) {
5587 /// %0 = bitcast void* %arg to <type>*
5588 /// <destroy>(<type>* %0)
5589 /// ret void
5590 /// }
5591 /// \endcode
5592 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5593                                            SourceLocation Loc,
5594                                            ReductionCodeGen &RCG, unsigned N) {
5595   if (!RCG.needCleanups(N))
5596     return nullptr;
5597   ASTContext &C = CGM.getContext();
5598   FunctionArgList Args;
5599   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5600                           ImplicitParamDecl::Other);
5601   Args.emplace_back(&Param);
5602   const auto &FnInfo =
5603       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5604   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5605   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5606   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5607                                     Name, &CGM.getModule());
5608   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5609   Fn->setDoesNotRecurse();
5610   CodeGenFunction CGF(CGM);
5611   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5612   Address PrivateAddr = CGF.EmitLoadOfPointer(
5613       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5614   llvm::Value *Size = nullptr;
5615   // If the size of the reduction item is non-constant, load it from global
5616   // threadprivate variable.
5617   if (RCG.getSizes(N).second) {
5618     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5619         CGF, CGM.getContext().getSizeType(),
5620         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5621     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5622                                 CGM.getContext().getSizeType(), Loc);
5623   }
5624   RCG.emitAggregateType(CGF, N, Size);
5625   // Emit the finalizer body:
5626   // <destroy>(<type>* %0)
5627   RCG.emitCleanups(CGF, N, PrivateAddr);
5628   CGF.FinishFunction(Loc);
5629   return Fn;
5630 }
5631 
5632 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5633     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5634     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5635   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5636     return nullptr;
5637 
5638   // Build typedef struct:
5639   // kmp_taskred_input {
5640   //   void *reduce_shar; // shared reduction item
5641   //   void *reduce_orig; // original reduction item used for initialization
5642   //   size_t reduce_size; // size of data item
5643   //   void *reduce_init; // data initialization routine
5644   //   void *reduce_fini; // data finalization routine
5645   //   void *reduce_comb; // data combiner routine
5646   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5647   // } kmp_taskred_input_t;
5648   ASTContext &C = CGM.getContext();
5649   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5650   RD->startDefinition();
5651   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5652   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5653   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5654   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5655   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5656   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5657   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5658       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5659   RD->completeDefinition();
5660   QualType RDType = C.getRecordType(RD);
5661   unsigned Size = Data.ReductionVars.size();
5662   llvm::APInt ArraySize(/*numBits=*/64, Size);
5663   QualType ArrayRDType = C.getConstantArrayType(
5664       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5665   // kmp_task_red_input_t .rd_input.[Size];
5666   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5667   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5668                        Data.ReductionCopies, Data.ReductionOps);
5669   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5670     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5671     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5672                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5673     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5674         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5675         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5676         ".rd_input.gep.");
5677     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5678     // ElemLVal.reduce_shar = &Shareds[Cnt];
5679     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5680     RCG.emitSharedOrigLValue(CGF, Cnt);
5681     llvm::Value *CastedShared =
5682         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5683     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5684     // ElemLVal.reduce_orig = &Origs[Cnt];
5685     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5686     llvm::Value *CastedOrig =
5687         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5688     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5689     RCG.emitAggregateType(CGF, Cnt);
5690     llvm::Value *SizeValInChars;
5691     llvm::Value *SizeVal;
5692     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5693     // We use delayed creation/initialization for VLAs and array sections. It is
5694     // required because runtime does not provide the way to pass the sizes of
5695     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5696     // threadprivate global variables are used to store these values and use
5697     // them in the functions.
5698     bool DelayedCreation = !!SizeVal;
5699     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5700                                                /*isSigned=*/false);
5701     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5702     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5703     // ElemLVal.reduce_init = init;
5704     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5705     llvm::Value *InitAddr =
5706         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5707     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5708     // ElemLVal.reduce_fini = fini;
5709     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5710     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5711     llvm::Value *FiniAddr = Fini
5712                                 ? CGF.EmitCastToVoidPtr(Fini)
5713                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5714     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5715     // ElemLVal.reduce_comb = comb;
5716     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5717     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5718         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5719         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5720     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5721     // ElemLVal.flags = 0;
5722     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5723     if (DelayedCreation) {
5724       CGF.EmitStoreOfScalar(
5725           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5726           FlagsLVal);
5727     } else
5728       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5729                                  FlagsLVal.getType());
5730   }
5731   if (Data.IsReductionWithTaskMod) {
5732     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5733     // is_ws, int num, void *data);
5734     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5735     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5736                                                   CGM.IntTy, /*isSigned=*/true);
5737     llvm::Value *Args[] = {
5738         IdentTLoc, GTid,
5739         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5740                                /*isSigned=*/true),
5741         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5742         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5743             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5744     return CGF.EmitRuntimeCall(
5745         OMPBuilder.getOrCreateRuntimeFunction(
5746             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5747         Args);
5748   }
5749   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5750   llvm::Value *Args[] = {
5751       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5752                                 /*isSigned=*/true),
5753       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5754       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5755                                                       CGM.VoidPtrTy)};
5756   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5757                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5758                              Args);
5759 }
5760 
5761 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5762                                             SourceLocation Loc,
5763                                             bool IsWorksharingReduction) {
5764   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5765   // is_ws, int num, void *data);
5766   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5767   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5768                                                 CGM.IntTy, /*isSigned=*/true);
5769   llvm::Value *Args[] = {IdentTLoc, GTid,
5770                          llvm::ConstantInt::get(CGM.IntTy,
5771                                                 IsWorksharingReduction ? 1 : 0,
5772                                                 /*isSigned=*/true)};
5773   (void)CGF.EmitRuntimeCall(
5774       OMPBuilder.getOrCreateRuntimeFunction(
5775           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5776       Args);
5777 }
5778 
5779 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5780                                               SourceLocation Loc,
5781                                               ReductionCodeGen &RCG,
5782                                               unsigned N) {
5783   auto Sizes = RCG.getSizes(N);
5784   // Emit threadprivate global variable if the type is non-constant
5785   // (Sizes.second = nullptr).
5786   if (Sizes.second) {
5787     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5788                                                      /*isSigned=*/false);
5789     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5790         CGF, CGM.getContext().getSizeType(),
5791         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5792     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5793   }
5794 }
5795 
5796 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5797                                               SourceLocation Loc,
5798                                               llvm::Value *ReductionsPtr,
5799                                               LValue SharedLVal) {
5800   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5801   // *d);
5802   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5803                                                    CGM.IntTy,
5804                                                    /*isSigned=*/true),
5805                          ReductionsPtr,
5806                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5807                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5808   return Address(
5809       CGF.EmitRuntimeCall(
5810           OMPBuilder.getOrCreateRuntimeFunction(
5811               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5812           Args),
5813       CGF.Int8Ty, SharedLVal.getAlignment());
5814 }
5815 
5816 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5817                                        const OMPTaskDataTy &Data) {
5818   if (!CGF.HaveInsertPoint())
5819     return;
5820 
5821   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5822     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5823     OMPBuilder.createTaskwait(CGF.Builder);
5824   } else {
5825     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5826     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5827     auto &M = CGM.getModule();
5828     Address DependenciesArray = Address::invalid();
5829     llvm::Value *NumOfElements;
5830     std::tie(NumOfElements, DependenciesArray) =
5831         emitDependClause(CGF, Data.Dependences, Loc);
5832     if (!Data.Dependences.empty()) {
5833       llvm::Value *DepWaitTaskArgs[7];
5834       DepWaitTaskArgs[0] = UpLoc;
5835       DepWaitTaskArgs[1] = ThreadID;
5836       DepWaitTaskArgs[2] = NumOfElements;
5837       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5838       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5839       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5840       DepWaitTaskArgs[6] =
5841           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5842 
5843       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5844 
5845       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5846       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5847       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5848       // kmp_int32 has_no_wait); if dependence info is specified.
5849       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5850                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5851                           DepWaitTaskArgs);
5852 
5853     } else {
5854 
5855       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5856       // global_tid);
5857       llvm::Value *Args[] = {UpLoc, ThreadID};
5858       // Ignore return result until untied tasks are supported.
5859       CGF.EmitRuntimeCall(
5860           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5861           Args);
5862     }
5863   }
5864 
5865   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5866     Region->emitUntiedSwitch(CGF);
5867 }
5868 
5869 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5870                                            OpenMPDirectiveKind InnerKind,
5871                                            const RegionCodeGenTy &CodeGen,
5872                                            bool HasCancel) {
5873   if (!CGF.HaveInsertPoint())
5874     return;
5875   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5876                                  InnerKind != OMPD_critical &&
5877                                      InnerKind != OMPD_master &&
5878                                      InnerKind != OMPD_masked);
5879   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5880 }
5881 
5882 namespace {
5883 enum RTCancelKind {
5884   CancelNoreq = 0,
5885   CancelParallel = 1,
5886   CancelLoop = 2,
5887   CancelSections = 3,
5888   CancelTaskgroup = 4
5889 };
5890 } // anonymous namespace
5891 
5892 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5893   RTCancelKind CancelKind = CancelNoreq;
5894   if (CancelRegion == OMPD_parallel)
5895     CancelKind = CancelParallel;
5896   else if (CancelRegion == OMPD_for)
5897     CancelKind = CancelLoop;
5898   else if (CancelRegion == OMPD_sections)
5899     CancelKind = CancelSections;
5900   else {
5901     assert(CancelRegion == OMPD_taskgroup);
5902     CancelKind = CancelTaskgroup;
5903   }
5904   return CancelKind;
5905 }
5906 
5907 void CGOpenMPRuntime::emitCancellationPointCall(
5908     CodeGenFunction &CGF, SourceLocation Loc,
5909     OpenMPDirectiveKind CancelRegion) {
5910   if (!CGF.HaveInsertPoint())
5911     return;
5912   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5913   // global_tid, kmp_int32 cncl_kind);
5914   if (auto *OMPRegionInfo =
5915           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5916     // For 'cancellation point taskgroup', the task region info may not have a
5917     // cancel. This may instead happen in another adjacent task.
5918     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5919       llvm::Value *Args[] = {
5920           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5921           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5922       // Ignore return result until untied tasks are supported.
5923       llvm::Value *Result = CGF.EmitRuntimeCall(
5924           OMPBuilder.getOrCreateRuntimeFunction(
5925               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5926           Args);
5927       // if (__kmpc_cancellationpoint()) {
5928       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5929       //   exit from construct;
5930       // }
5931       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5932       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5933       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5934       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5935       CGF.EmitBlock(ExitBB);
5936       if (CancelRegion == OMPD_parallel)
5937         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5938       // exit from construct;
5939       CodeGenFunction::JumpDest CancelDest =
5940           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5941       CGF.EmitBranchThroughCleanup(CancelDest);
5942       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5943     }
5944   }
5945 }
5946 
5947 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5948                                      const Expr *IfCond,
5949                                      OpenMPDirectiveKind CancelRegion) {
5950   if (!CGF.HaveInsertPoint())
5951     return;
5952   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5953   // kmp_int32 cncl_kind);
5954   auto &M = CGM.getModule();
5955   if (auto *OMPRegionInfo =
5956           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5957     auto &&ThenGen = [this, &M, Loc, CancelRegion,
5958                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5959       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5960       llvm::Value *Args[] = {
5961           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5962           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5963       // Ignore return result until untied tasks are supported.
5964       llvm::Value *Result = CGF.EmitRuntimeCall(
5965           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5966       // if (__kmpc_cancel()) {
5967       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5968       //   exit from construct;
5969       // }
5970       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5971       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5972       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5973       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5974       CGF.EmitBlock(ExitBB);
5975       if (CancelRegion == OMPD_parallel)
5976         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5977       // exit from construct;
5978       CodeGenFunction::JumpDest CancelDest =
5979           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5980       CGF.EmitBranchThroughCleanup(CancelDest);
5981       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5982     };
5983     if (IfCond) {
5984       emitIfClause(CGF, IfCond, ThenGen,
5985                    [](CodeGenFunction &, PrePostActionTy &) {});
5986     } else {
5987       RegionCodeGenTy ThenRCG(ThenGen);
5988       ThenRCG(CGF);
5989     }
5990   }
5991 }
5992 
5993 namespace {
5994 /// Cleanup action for uses_allocators support.
5995 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5996   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5997 
5998 public:
5999   OMPUsesAllocatorsActionTy(
6000       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6001       : Allocators(Allocators) {}
6002   void Enter(CodeGenFunction &CGF) override {
6003     if (!CGF.HaveInsertPoint())
6004       return;
6005     for (const auto &AllocatorData : Allocators) {
6006       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6007           CGF, AllocatorData.first, AllocatorData.second);
6008     }
6009   }
6010   void Exit(CodeGenFunction &CGF) override {
6011     if (!CGF.HaveInsertPoint())
6012       return;
6013     for (const auto &AllocatorData : Allocators) {
6014       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6015                                                         AllocatorData.first);
6016     }
6017   }
6018 };
6019 } // namespace
6020 
6021 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6022     const OMPExecutableDirective &D, StringRef ParentName,
6023     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6024     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6025   assert(!ParentName.empty() && "Invalid target entry parent name!");
6026   HasEmittedTargetRegion = true;
6027   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6028   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6029     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6030       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6031       if (!D.AllocatorTraits)
6032         continue;
6033       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6034     }
6035   }
6036   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6037   CodeGen.setAction(UsesAllocatorAction);
6038   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6039                                    IsOffloadEntry, CodeGen);
6040 }
6041 
6042 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6043                                              const Expr *Allocator,
6044                                              const Expr *AllocatorTraits) {
6045   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6046   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6047   // Use default memspace handle.
6048   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6049   llvm::Value *NumTraits = llvm::ConstantInt::get(
6050       CGF.IntTy, cast<ConstantArrayType>(
6051                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6052                      ->getSize()
6053                      .getLimitedValue());
6054   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6055   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6056       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6057   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6058                                            AllocatorTraitsLVal.getBaseInfo(),
6059                                            AllocatorTraitsLVal.getTBAAInfo());
6060   llvm::Value *Traits =
6061       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6062 
6063   llvm::Value *AllocatorVal =
6064       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6065                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6066                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6067   // Store to allocator.
6068   CGF.EmitVarDecl(*cast<VarDecl>(
6069       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6070   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6071   AllocatorVal =
6072       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6073                                Allocator->getType(), Allocator->getExprLoc());
6074   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6075 }
6076 
6077 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6078                                              const Expr *Allocator) {
6079   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6080   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6081   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6082   llvm::Value *AllocatorVal =
6083       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6084   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6085                                           CGF.getContext().VoidPtrTy,
6086                                           Allocator->getExprLoc());
6087   (void)CGF.EmitRuntimeCall(
6088       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6089                                             OMPRTL___kmpc_destroy_allocator),
6090       {ThreadId, AllocatorVal});
6091 }
6092 
6093 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6094     const OMPExecutableDirective &D, StringRef ParentName,
6095     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6096     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6097 
6098   auto EntryInfo =
6099       getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName);
6100 
6101   CodeGenFunction CGF(CGM, true);
6102   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6103       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6104         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6105 
6106         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6107         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6108         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6109       };
6110 
6111   // Get NumTeams and ThreadLimit attributes
6112   int32_t DefaultValTeams = -1;
6113   int32_t DefaultValThreads = -1;
6114   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6115   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6116 
6117   OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo,
6118                                       GenerateOutlinedFunction, DefaultValTeams,
6119                                       DefaultValThreads, IsOffloadEntry,
6120                                       OutlinedFn, OutlinedFnID);
6121 
6122   if (OutlinedFn != nullptr)
6123     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6124 }
6125 
6126 /// Checks if the expression is constant or does not have non-trivial function
6127 /// calls.
6128 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6129   // We can skip constant expressions.
6130   // We can skip expressions with trivial calls or simple expressions.
6131   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6132           !E->hasNonTrivialCall(Ctx)) &&
6133          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6134 }
6135 
6136 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6137                                                     const Stmt *Body) {
6138   const Stmt *Child = Body->IgnoreContainers();
6139   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6140     Child = nullptr;
6141     for (const Stmt *S : C->body()) {
6142       if (const auto *E = dyn_cast<Expr>(S)) {
6143         if (isTrivial(Ctx, E))
6144           continue;
6145       }
6146       // Some of the statements can be ignored.
6147       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6148           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6149         continue;
6150       // Analyze declarations.
6151       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6152         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6153               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6154                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6155                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6156                   isa<UsingDirectiveDecl>(D) ||
6157                   isa<OMPDeclareReductionDecl>(D) ||
6158                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6159                 return true;
6160               const auto *VD = dyn_cast<VarDecl>(D);
6161               if (!VD)
6162                 return false;
6163               return VD->hasGlobalStorage() || !VD->isUsed();
6164             }))
6165           continue;
6166       }
6167       // Found multiple children - cannot get the one child only.
6168       if (Child)
6169         return nullptr;
6170       Child = S;
6171     }
6172     if (Child)
6173       Child = Child->IgnoreContainers();
6174   }
6175   return Child;
6176 }
6177 
6178 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6179     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6180     int32_t &DefaultVal) {
6181 
6182   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6183   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6184          "Expected target-based executable directive.");
6185   switch (DirectiveKind) {
6186   case OMPD_target: {
6187     const auto *CS = D.getInnermostCapturedStmt();
6188     const auto *Body =
6189         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6190     const Stmt *ChildStmt =
6191         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6192     if (const auto *NestedDir =
6193             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6194       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6195         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6196           const Expr *NumTeams =
6197               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6198           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6199             if (auto Constant =
6200                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6201               DefaultVal = Constant->getExtValue();
6202           return NumTeams;
6203         }
6204         DefaultVal = 0;
6205         return nullptr;
6206       }
6207       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6208           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6209         DefaultVal = 1;
6210         return nullptr;
6211       }
6212       DefaultVal = 1;
6213       return nullptr;
6214     }
6215     // A value of -1 is used to check if we need to emit no teams region
6216     DefaultVal = -1;
6217     return nullptr;
6218   }
6219   case OMPD_target_teams:
6220   case OMPD_target_teams_distribute:
6221   case OMPD_target_teams_distribute_simd:
6222   case OMPD_target_teams_distribute_parallel_for:
6223   case OMPD_target_teams_distribute_parallel_for_simd: {
6224     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6225       const Expr *NumTeams =
6226           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6227       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6228         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6229           DefaultVal = Constant->getExtValue();
6230       return NumTeams;
6231     }
6232     DefaultVal = 0;
6233     return nullptr;
6234   }
6235   case OMPD_target_parallel:
6236   case OMPD_target_parallel_for:
6237   case OMPD_target_parallel_for_simd:
6238   case OMPD_target_simd:
6239     DefaultVal = 1;
6240     return nullptr;
6241   case OMPD_parallel:
6242   case OMPD_for:
6243   case OMPD_parallel_for:
6244   case OMPD_parallel_master:
6245   case OMPD_parallel_sections:
6246   case OMPD_for_simd:
6247   case OMPD_parallel_for_simd:
6248   case OMPD_cancel:
6249   case OMPD_cancellation_point:
6250   case OMPD_ordered:
6251   case OMPD_threadprivate:
6252   case OMPD_allocate:
6253   case OMPD_task:
6254   case OMPD_simd:
6255   case OMPD_tile:
6256   case OMPD_unroll:
6257   case OMPD_sections:
6258   case OMPD_section:
6259   case OMPD_single:
6260   case OMPD_master:
6261   case OMPD_critical:
6262   case OMPD_taskyield:
6263   case OMPD_barrier:
6264   case OMPD_taskwait:
6265   case OMPD_taskgroup:
6266   case OMPD_atomic:
6267   case OMPD_flush:
6268   case OMPD_depobj:
6269   case OMPD_scan:
6270   case OMPD_teams:
6271   case OMPD_target_data:
6272   case OMPD_target_exit_data:
6273   case OMPD_target_enter_data:
6274   case OMPD_distribute:
6275   case OMPD_distribute_simd:
6276   case OMPD_distribute_parallel_for:
6277   case OMPD_distribute_parallel_for_simd:
6278   case OMPD_teams_distribute:
6279   case OMPD_teams_distribute_simd:
6280   case OMPD_teams_distribute_parallel_for:
6281   case OMPD_teams_distribute_parallel_for_simd:
6282   case OMPD_target_update:
6283   case OMPD_declare_simd:
6284   case OMPD_declare_variant:
6285   case OMPD_begin_declare_variant:
6286   case OMPD_end_declare_variant:
6287   case OMPD_declare_target:
6288   case OMPD_end_declare_target:
6289   case OMPD_declare_reduction:
6290   case OMPD_declare_mapper:
6291   case OMPD_taskloop:
6292   case OMPD_taskloop_simd:
6293   case OMPD_master_taskloop:
6294   case OMPD_master_taskloop_simd:
6295   case OMPD_parallel_master_taskloop:
6296   case OMPD_parallel_master_taskloop_simd:
6297   case OMPD_requires:
6298   case OMPD_metadirective:
6299   case OMPD_unknown:
6300     break;
6301   default:
6302     break;
6303   }
6304   llvm_unreachable("Unexpected directive kind.");
6305 }
6306 
6307 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6308     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6309   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6310          "Clauses associated with the teams directive expected to be emitted "
6311          "only for the host!");
6312   CGBuilderTy &Bld = CGF.Builder;
6313   int32_t DefaultNT = -1;
6314   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6315   if (NumTeams != nullptr) {
6316     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6317 
6318     switch (DirectiveKind) {
6319     case OMPD_target: {
6320       const auto *CS = D.getInnermostCapturedStmt();
6321       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6322       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6323       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6324                                                   /*IgnoreResultAssign*/ true);
6325       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6326                              /*isSigned=*/true);
6327     }
6328     case OMPD_target_teams:
6329     case OMPD_target_teams_distribute:
6330     case OMPD_target_teams_distribute_simd:
6331     case OMPD_target_teams_distribute_parallel_for:
6332     case OMPD_target_teams_distribute_parallel_for_simd: {
6333       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6334       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6335                                                   /*IgnoreResultAssign*/ true);
6336       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6337                              /*isSigned=*/true);
6338     }
6339     default:
6340       break;
6341     }
6342   }
6343 
6344   return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6345 }
6346 
6347 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6348                                   llvm::Value *DefaultThreadLimitVal) {
6349   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6350       CGF.getContext(), CS->getCapturedStmt());
6351   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6352     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6353       llvm::Value *NumThreads = nullptr;
6354       llvm::Value *CondVal = nullptr;
6355       // Handle if clause. If if clause present, the number of threads is
6356       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6357       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6358         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6359         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6360         const OMPIfClause *IfClause = nullptr;
6361         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6362           if (C->getNameModifier() == OMPD_unknown ||
6363               C->getNameModifier() == OMPD_parallel) {
6364             IfClause = C;
6365             break;
6366           }
6367         }
6368         if (IfClause) {
6369           const Expr *Cond = IfClause->getCondition();
6370           bool Result;
6371           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6372             if (!Result)
6373               return CGF.Builder.getInt32(1);
6374           } else {
6375             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6376             if (const auto *PreInit =
6377                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6378               for (const auto *I : PreInit->decls()) {
6379                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6380                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6381                 } else {
6382                   CodeGenFunction::AutoVarEmission Emission =
6383                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6384                   CGF.EmitAutoVarCleanups(Emission);
6385                 }
6386               }
6387             }
6388             CondVal = CGF.EvaluateExprAsBool(Cond);
6389           }
6390         }
6391       }
6392       // Check the value of num_threads clause iff if clause was not specified
6393       // or is not evaluated to false.
6394       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6395         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6396         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6397         const auto *NumThreadsClause =
6398             Dir->getSingleClause<OMPNumThreadsClause>();
6399         CodeGenFunction::LexicalScope Scope(
6400             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6401         if (const auto *PreInit =
6402                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6403           for (const auto *I : PreInit->decls()) {
6404             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6405               CGF.EmitVarDecl(cast<VarDecl>(*I));
6406             } else {
6407               CodeGenFunction::AutoVarEmission Emission =
6408                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6409               CGF.EmitAutoVarCleanups(Emission);
6410             }
6411           }
6412         }
6413         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6414         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6415                                                /*isSigned=*/false);
6416         if (DefaultThreadLimitVal)
6417           NumThreads = CGF.Builder.CreateSelect(
6418               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6419               DefaultThreadLimitVal, NumThreads);
6420       } else {
6421         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6422                                            : CGF.Builder.getInt32(0);
6423       }
6424       // Process condition of the if clause.
6425       if (CondVal) {
6426         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6427                                               CGF.Builder.getInt32(1));
6428       }
6429       return NumThreads;
6430     }
6431     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6432       return CGF.Builder.getInt32(1);
6433   }
6434   return DefaultThreadLimitVal;
6435 }
6436 
6437 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6438     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6439     int32_t &DefaultVal) {
6440   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6441   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6442          "Expected target-based executable directive.");
6443 
6444   switch (DirectiveKind) {
6445   case OMPD_target:
6446     // Teams have no clause thread_limit
6447     return nullptr;
6448   case OMPD_target_teams:
6449   case OMPD_target_teams_distribute:
6450     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6451       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6452       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6453       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6454         if (auto Constant =
6455                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6456           DefaultVal = Constant->getExtValue();
6457       return ThreadLimit;
6458     }
6459     return nullptr;
6460   case OMPD_target_parallel:
6461   case OMPD_target_parallel_for:
6462   case OMPD_target_parallel_for_simd:
6463   case OMPD_target_teams_distribute_parallel_for:
6464   case OMPD_target_teams_distribute_parallel_for_simd: {
6465     Expr *ThreadLimit = nullptr;
6466     Expr *NumThreads = nullptr;
6467     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6468       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6469       ThreadLimit = ThreadLimitClause->getThreadLimit();
6470       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6471         if (auto Constant =
6472                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6473           DefaultVal = Constant->getExtValue();
6474     }
6475     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6476       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6477       NumThreads = NumThreadsClause->getNumThreads();
6478       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6479         if (auto Constant =
6480                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6481           if (Constant->getExtValue() < DefaultVal) {
6482             DefaultVal = Constant->getExtValue();
6483             ThreadLimit = NumThreads;
6484           }
6485         }
6486       }
6487     }
6488     return ThreadLimit;
6489   }
6490   case OMPD_target_teams_distribute_simd:
6491   case OMPD_target_simd:
6492     DefaultVal = 1;
6493     return nullptr;
6494   case OMPD_parallel:
6495   case OMPD_for:
6496   case OMPD_parallel_for:
6497   case OMPD_parallel_master:
6498   case OMPD_parallel_sections:
6499   case OMPD_for_simd:
6500   case OMPD_parallel_for_simd:
6501   case OMPD_cancel:
6502   case OMPD_cancellation_point:
6503   case OMPD_ordered:
6504   case OMPD_threadprivate:
6505   case OMPD_allocate:
6506   case OMPD_task:
6507   case OMPD_simd:
6508   case OMPD_tile:
6509   case OMPD_unroll:
6510   case OMPD_sections:
6511   case OMPD_section:
6512   case OMPD_single:
6513   case OMPD_master:
6514   case OMPD_critical:
6515   case OMPD_taskyield:
6516   case OMPD_barrier:
6517   case OMPD_taskwait:
6518   case OMPD_taskgroup:
6519   case OMPD_atomic:
6520   case OMPD_flush:
6521   case OMPD_depobj:
6522   case OMPD_scan:
6523   case OMPD_teams:
6524   case OMPD_target_data:
6525   case OMPD_target_exit_data:
6526   case OMPD_target_enter_data:
6527   case OMPD_distribute:
6528   case OMPD_distribute_simd:
6529   case OMPD_distribute_parallel_for:
6530   case OMPD_distribute_parallel_for_simd:
6531   case OMPD_teams_distribute:
6532   case OMPD_teams_distribute_simd:
6533   case OMPD_teams_distribute_parallel_for:
6534   case OMPD_teams_distribute_parallel_for_simd:
6535   case OMPD_target_update:
6536   case OMPD_declare_simd:
6537   case OMPD_declare_variant:
6538   case OMPD_begin_declare_variant:
6539   case OMPD_end_declare_variant:
6540   case OMPD_declare_target:
6541   case OMPD_end_declare_target:
6542   case OMPD_declare_reduction:
6543   case OMPD_declare_mapper:
6544   case OMPD_taskloop:
6545   case OMPD_taskloop_simd:
6546   case OMPD_master_taskloop:
6547   case OMPD_master_taskloop_simd:
6548   case OMPD_parallel_master_taskloop:
6549   case OMPD_parallel_master_taskloop_simd:
6550   case OMPD_requires:
6551   case OMPD_unknown:
6552     break;
6553   default:
6554     break;
6555   }
6556   llvm_unreachable("Unsupported directive kind.");
6557 }
6558 
6559 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6560     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6561   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6562          "Clauses associated with the teams directive expected to be emitted "
6563          "only for the host!");
6564   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6565   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6566          "Expected target-based executable directive.");
6567   CGBuilderTy &Bld = CGF.Builder;
6568   llvm::Value *ThreadLimitVal = nullptr;
6569   llvm::Value *NumThreadsVal = nullptr;
6570   switch (DirectiveKind) {
6571   case OMPD_target: {
6572     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6573     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6574       return NumThreads;
6575     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6576         CGF.getContext(), CS->getCapturedStmt());
6577     // TODO: The standard is not clear how to resolve two thread limit clauses,
6578     //       let's pick the teams one if it's present, otherwise the target one.
6579     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6580     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6581       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6582         ThreadLimitClause = TLC;
6583         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6584         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6585         CodeGenFunction::LexicalScope Scope(
6586             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6587         if (const auto *PreInit =
6588                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6589           for (const auto *I : PreInit->decls()) {
6590             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591               CGF.EmitVarDecl(cast<VarDecl>(*I));
6592             } else {
6593               CodeGenFunction::AutoVarEmission Emission =
6594                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6595               CGF.EmitAutoVarCleanups(Emission);
6596             }
6597           }
6598         }
6599       }
6600     }
6601     if (ThreadLimitClause) {
6602       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6603           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6604       ThreadLimitVal =
6605           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6606     }
6607     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6608       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6609           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6610         CS = Dir->getInnermostCapturedStmt();
6611         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6612             CGF.getContext(), CS->getCapturedStmt());
6613         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6614       }
6615       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6616           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6617         CS = Dir->getInnermostCapturedStmt();
6618         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6619           return NumThreads;
6620       }
6621       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6622         return Bld.getInt32(1);
6623     }
6624     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6625   }
6626   case OMPD_target_teams: {
6627     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6628       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6629       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6630       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6631           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6632       ThreadLimitVal =
6633           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6634     }
6635     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6636     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6637       return NumThreads;
6638     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6639         CGF.getContext(), CS->getCapturedStmt());
6640     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6641       if (Dir->getDirectiveKind() == OMPD_distribute) {
6642         CS = Dir->getInnermostCapturedStmt();
6643         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6644           return NumThreads;
6645       }
6646     }
6647     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6648   }
6649   case OMPD_target_teams_distribute:
6650     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6651       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6652       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6653       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6654           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6655       ThreadLimitVal =
6656           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6657     }
6658     if (llvm::Value *NumThreads =
6659             getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
6660       return NumThreads;
6661     return Bld.getInt32(0);
6662   case OMPD_target_parallel:
6663   case OMPD_target_parallel_for:
6664   case OMPD_target_parallel_for_simd:
6665   case OMPD_target_teams_distribute_parallel_for:
6666   case OMPD_target_teams_distribute_parallel_for_simd: {
6667     llvm::Value *CondVal = nullptr;
6668     // Handle if clause. If if clause present, the number of threads is
6669     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6670     if (D.hasClausesOfKind<OMPIfClause>()) {
6671       const OMPIfClause *IfClause = nullptr;
6672       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6673         if (C->getNameModifier() == OMPD_unknown ||
6674             C->getNameModifier() == OMPD_parallel) {
6675           IfClause = C;
6676           break;
6677         }
6678       }
6679       if (IfClause) {
6680         const Expr *Cond = IfClause->getCondition();
6681         bool Result;
6682         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6683           if (!Result)
6684             return Bld.getInt32(1);
6685         } else {
6686           CodeGenFunction::RunCleanupsScope Scope(CGF);
6687           CondVal = CGF.EvaluateExprAsBool(Cond);
6688         }
6689       }
6690     }
6691     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6692       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6693       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6694       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6695           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6696       ThreadLimitVal =
6697           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6698     }
6699     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6700       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6701       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6702       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6703           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6704       NumThreadsVal =
6705           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6706       ThreadLimitVal = ThreadLimitVal
6707                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6708                                                                 ThreadLimitVal),
6709                                               NumThreadsVal, ThreadLimitVal)
6710                            : NumThreadsVal;
6711     }
6712     if (!ThreadLimitVal)
6713       ThreadLimitVal = Bld.getInt32(0);
6714     if (CondVal)
6715       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6716     return ThreadLimitVal;
6717   }
6718   case OMPD_target_teams_distribute_simd:
6719   case OMPD_target_simd:
6720     return Bld.getInt32(1);
6721   case OMPD_parallel:
6722   case OMPD_for:
6723   case OMPD_parallel_for:
6724   case OMPD_parallel_master:
6725   case OMPD_parallel_sections:
6726   case OMPD_for_simd:
6727   case OMPD_parallel_for_simd:
6728   case OMPD_cancel:
6729   case OMPD_cancellation_point:
6730   case OMPD_ordered:
6731   case OMPD_threadprivate:
6732   case OMPD_allocate:
6733   case OMPD_task:
6734   case OMPD_simd:
6735   case OMPD_tile:
6736   case OMPD_unroll:
6737   case OMPD_sections:
6738   case OMPD_section:
6739   case OMPD_single:
6740   case OMPD_master:
6741   case OMPD_critical:
6742   case OMPD_taskyield:
6743   case OMPD_barrier:
6744   case OMPD_taskwait:
6745   case OMPD_taskgroup:
6746   case OMPD_atomic:
6747   case OMPD_flush:
6748   case OMPD_depobj:
6749   case OMPD_scan:
6750   case OMPD_teams:
6751   case OMPD_target_data:
6752   case OMPD_target_exit_data:
6753   case OMPD_target_enter_data:
6754   case OMPD_distribute:
6755   case OMPD_distribute_simd:
6756   case OMPD_distribute_parallel_for:
6757   case OMPD_distribute_parallel_for_simd:
6758   case OMPD_teams_distribute:
6759   case OMPD_teams_distribute_simd:
6760   case OMPD_teams_distribute_parallel_for:
6761   case OMPD_teams_distribute_parallel_for_simd:
6762   case OMPD_target_update:
6763   case OMPD_declare_simd:
6764   case OMPD_declare_variant:
6765   case OMPD_begin_declare_variant:
6766   case OMPD_end_declare_variant:
6767   case OMPD_declare_target:
6768   case OMPD_end_declare_target:
6769   case OMPD_declare_reduction:
6770   case OMPD_declare_mapper:
6771   case OMPD_taskloop:
6772   case OMPD_taskloop_simd:
6773   case OMPD_master_taskloop:
6774   case OMPD_master_taskloop_simd:
6775   case OMPD_parallel_master_taskloop:
6776   case OMPD_parallel_master_taskloop_simd:
6777   case OMPD_requires:
6778   case OMPD_metadirective:
6779   case OMPD_unknown:
6780     break;
6781   default:
6782     break;
6783   }
6784   llvm_unreachable("Unsupported directive kind.");
6785 }
6786 
6787 namespace {
6788 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6789 
6790 // Utility to handle information from clauses associated with a given
6791 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6792 // It provides a convenient interface to obtain the information and generate
6793 // code for that information.
6794 class MappableExprsHandler {
6795 public:
6796   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6797   static unsigned getFlagMemberOffset() {
6798     unsigned Offset = 0;
6799     for (uint64_t Remain =
6800              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6801                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6802          !(Remain & 1); Remain = Remain >> 1)
6803       Offset++;
6804     return Offset;
6805   }
6806 
6807   /// Class that holds debugging information for a data mapping to be passed to
6808   /// the runtime library.
6809   class MappingExprInfo {
6810     /// The variable declaration used for the data mapping.
6811     const ValueDecl *MapDecl = nullptr;
6812     /// The original expression used in the map clause, or null if there is
6813     /// none.
6814     const Expr *MapExpr = nullptr;
6815 
6816   public:
6817     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6818         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6819 
6820     const ValueDecl *getMapDecl() const { return MapDecl; }
6821     const Expr *getMapExpr() const { return MapExpr; }
6822   };
6823 
6824   /// Class that associates information with a base pointer to be passed to the
6825   /// runtime library.
6826   class BasePointerInfo {
6827     /// The base pointer.
6828     llvm::Value *Ptr = nullptr;
6829     /// The base declaration that refers to this device pointer, or null if
6830     /// there is none.
6831     const ValueDecl *DevPtrDecl = nullptr;
6832 
6833   public:
6834     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6835         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6836     llvm::Value *operator*() const { return Ptr; }
6837     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6838     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6839   };
6840 
6841   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6842   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6843   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6844   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6845   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
6846   using MapDimArrayTy = SmallVector<uint64_t, 4>;
6847   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
6848 
6849   /// This structure contains combined information generated for mappable
6850   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6851   /// mappers, and non-contiguous information.
6852   struct MapCombinedInfoTy {
6853     struct StructNonContiguousInfo {
6854       bool IsNonContiguous = false;
6855       MapDimArrayTy Dims;
6856       MapNonContiguousArrayTy Offsets;
6857       MapNonContiguousArrayTy Counts;
6858       MapNonContiguousArrayTy Strides;
6859     };
6860     MapExprsArrayTy Exprs;
6861     MapBaseValuesArrayTy BasePointers;
6862     MapValuesArrayTy Pointers;
6863     MapValuesArrayTy Sizes;
6864     MapFlagsArrayTy Types;
6865     MapMappersArrayTy Mappers;
6866     StructNonContiguousInfo NonContigInfo;
6867 
6868     /// Append arrays in \a CurInfo.
6869     void append(MapCombinedInfoTy &CurInfo) {
6870       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6871       BasePointers.append(CurInfo.BasePointers.begin(),
6872                           CurInfo.BasePointers.end());
6873       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
6874       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
6875       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
6876       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6877       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
6878                                  CurInfo.NonContigInfo.Dims.end());
6879       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
6880                                     CurInfo.NonContigInfo.Offsets.end());
6881       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
6882                                    CurInfo.NonContigInfo.Counts.end());
6883       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
6884                                     CurInfo.NonContigInfo.Strides.end());
6885     }
6886   };
6887 
6888   /// Map between a struct and the its lowest & highest elements which have been
6889   /// mapped.
6890   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6891   ///                    HE(FieldIndex, Pointer)}
6892   struct StructRangeInfoTy {
6893     MapCombinedInfoTy PreliminaryMapData;
6894     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6895         0, Address::invalid()};
6896     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6897         0, Address::invalid()};
6898     Address Base = Address::invalid();
6899     Address LB = Address::invalid();
6900     bool IsArraySection = false;
6901     bool HasCompleteRecord = false;
6902   };
6903 
6904 private:
6905   /// Kind that defines how a device pointer has to be returned.
6906   struct MapInfo {
6907     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6908     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6909     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6910     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6911     bool ReturnDevicePointer = false;
6912     bool IsImplicit = false;
6913     const ValueDecl *Mapper = nullptr;
6914     const Expr *VarRef = nullptr;
6915     bool ForDeviceAddr = false;
6916 
6917     MapInfo() = default;
6918     MapInfo(
6919         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6920         OpenMPMapClauseKind MapType,
6921         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6922         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6923         bool ReturnDevicePointer, bool IsImplicit,
6924         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6925         bool ForDeviceAddr = false)
6926         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6927           MotionModifiers(MotionModifiers),
6928           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6929           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6930   };
6931 
6932   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6933   /// member and there is no map information about it, then emission of that
6934   /// entry is deferred until the whole struct has been processed.
6935   struct DeferredDevicePtrEntryTy {
6936     const Expr *IE = nullptr;
6937     const ValueDecl *VD = nullptr;
6938     bool ForDeviceAddr = false;
6939 
6940     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6941                              bool ForDeviceAddr)
6942         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6943   };
6944 
6945   /// The target directive from where the mappable clauses were extracted. It
6946   /// is either a executable directive or a user-defined mapper directive.
6947   llvm::PointerUnion<const OMPExecutableDirective *,
6948                      const OMPDeclareMapperDecl *>
6949       CurDir;
6950 
6951   /// Function the directive is being generated for.
6952   CodeGenFunction &CGF;
6953 
6954   /// Set of all first private variables in the current directive.
6955   /// bool data is set to true if the variable is implicitly marked as
6956   /// firstprivate, false otherwise.
6957   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6958 
6959   /// Map between device pointer declarations and their expression components.
6960   /// The key value for declarations in 'this' is null.
6961   llvm::DenseMap<
6962       const ValueDecl *,
6963       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6964       DevPointersMap;
6965 
6966   /// Map between device addr declarations and their expression components.
6967   /// The key value for declarations in 'this' is null.
6968   llvm::DenseMap<
6969       const ValueDecl *,
6970       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6971       HasDevAddrsMap;
6972 
6973   /// Map between lambda declarations and their map type.
6974   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6975 
6976   llvm::Value *getExprTypeSize(const Expr *E) const {
6977     QualType ExprTy = E->getType().getCanonicalType();
6978 
6979     // Calculate the size for array shaping expression.
6980     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6981       llvm::Value *Size =
6982           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6983       for (const Expr *SE : OAE->getDimensions()) {
6984         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6985         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6986                                       CGF.getContext().getSizeType(),
6987                                       SE->getExprLoc());
6988         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6989       }
6990       return Size;
6991     }
6992 
6993     // Reference types are ignored for mapping purposes.
6994     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6995       ExprTy = RefTy->getPointeeType().getCanonicalType();
6996 
6997     // Given that an array section is considered a built-in type, we need to
6998     // do the calculation based on the length of the section instead of relying
6999     // on CGF.getTypeSize(E->getType()).
7000     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7001       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7002                             OAE->getBase()->IgnoreParenImpCasts())
7003                             .getCanonicalType();
7004 
7005       // If there is no length associated with the expression and lower bound is
7006       // not specified too, that means we are using the whole length of the
7007       // base.
7008       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7009           !OAE->getLowerBound())
7010         return CGF.getTypeSize(BaseTy);
7011 
7012       llvm::Value *ElemSize;
7013       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7014         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7015       } else {
7016         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7017         assert(ATy && "Expecting array type if not a pointer type.");
7018         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7019       }
7020 
7021       // If we don't have a length at this point, that is because we have an
7022       // array section with a single element.
7023       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7024         return ElemSize;
7025 
7026       if (const Expr *LenExpr = OAE->getLength()) {
7027         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7028         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7029                                              CGF.getContext().getSizeType(),
7030                                              LenExpr->getExprLoc());
7031         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7032       }
7033       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7034              OAE->getLowerBound() && "expected array_section[lb:].");
7035       // Size = sizetype - lb * elemtype;
7036       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7037       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7038       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7039                                        CGF.getContext().getSizeType(),
7040                                        OAE->getLowerBound()->getExprLoc());
7041       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7042       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7043       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7044       LengthVal = CGF.Builder.CreateSelect(
7045           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7046       return LengthVal;
7047     }
7048     return CGF.getTypeSize(ExprTy);
7049   }
7050 
7051   /// Return the corresponding bits for a given map clause modifier. Add
7052   /// a flag marking the map as a pointer if requested. Add a flag marking the
7053   /// map as the first one of a series of maps that relate to the same map
7054   /// expression.
7055   OpenMPOffloadMappingFlags getMapTypeBits(
7056       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7057       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7058       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7059     OpenMPOffloadMappingFlags Bits =
7060         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7061                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7062     switch (MapType) {
7063     case OMPC_MAP_alloc:
7064     case OMPC_MAP_release:
7065       // alloc and release is the default behavior in the runtime library,  i.e.
7066       // if we don't pass any bits alloc/release that is what the runtime is
7067       // going to do. Therefore, we don't need to signal anything for these two
7068       // type modifiers.
7069       break;
7070     case OMPC_MAP_to:
7071       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7072       break;
7073     case OMPC_MAP_from:
7074       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7075       break;
7076     case OMPC_MAP_tofrom:
7077       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7078               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7079       break;
7080     case OMPC_MAP_delete:
7081       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7082       break;
7083     case OMPC_MAP_unknown:
7084       llvm_unreachable("Unexpected map type!");
7085     }
7086     if (AddPtrFlag)
7087       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7088     if (AddIsTargetParamFlag)
7089       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7090     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7091       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7092     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7093       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7094     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7095         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7096       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7097     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7098       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7099     if (IsNonContiguous)
7100       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7101     return Bits;
7102   }
7103 
7104   /// Return true if the provided expression is a final array section. A
7105   /// final array section, is one whose length can't be proved to be one.
7106   bool isFinalArraySectionExpression(const Expr *E) const {
7107     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7108 
7109     // It is not an array section and therefore not a unity-size one.
7110     if (!OASE)
7111       return false;
7112 
7113     // An array section with no colon always refer to a single element.
7114     if (OASE->getColonLocFirst().isInvalid())
7115       return false;
7116 
7117     const Expr *Length = OASE->getLength();
7118 
7119     // If we don't have a length we have to check if the array has size 1
7120     // for this dimension. Also, we should always expect a length if the
7121     // base type is pointer.
7122     if (!Length) {
7123       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7124                              OASE->getBase()->IgnoreParenImpCasts())
7125                              .getCanonicalType();
7126       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7127         return ATy->getSize().getSExtValue() != 1;
7128       // If we don't have a constant dimension length, we have to consider
7129       // the current section as having any size, so it is not necessarily
7130       // unitary. If it happen to be unity size, that's user fault.
7131       return true;
7132     }
7133 
7134     // Check if the length evaluates to 1.
7135     Expr::EvalResult Result;
7136     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7137       return true; // Can have more that size 1.
7138 
7139     llvm::APSInt ConstLength = Result.Val.getInt();
7140     return ConstLength.getSExtValue() != 1;
7141   }
7142 
7143   /// Generate the base pointers, section pointers, sizes, map type bits, and
7144   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7145   /// map type, map or motion modifiers, and expression components.
7146   /// \a IsFirstComponent should be set to true if the provided set of
7147   /// components is the first associated with a capture.
7148   void generateInfoForComponentList(
7149       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7150       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7151       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7152       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7153       bool IsFirstComponentList, bool IsImplicit,
7154       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7155       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7156       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7157           OverlappedElements = std::nullopt) const {
7158     // The following summarizes what has to be generated for each map and the
7159     // types below. The generated information is expressed in this order:
7160     // base pointer, section pointer, size, flags
7161     // (to add to the ones that come from the map type and modifier).
7162     //
7163     // double d;
7164     // int i[100];
7165     // float *p;
7166     //
7167     // struct S1 {
7168     //   int i;
7169     //   float f[50];
7170     // }
7171     // struct S2 {
7172     //   int i;
7173     //   float f[50];
7174     //   S1 s;
7175     //   double *p;
7176     //   struct S2 *ps;
7177     //   int &ref;
7178     // }
7179     // S2 s;
7180     // S2 *ps;
7181     //
7182     // map(d)
7183     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7184     //
7185     // map(i)
7186     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7187     //
7188     // map(i[1:23])
7189     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7190     //
7191     // map(p)
7192     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7193     //
7194     // map(p[1:24])
7195     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7196     // in unified shared memory mode or for local pointers
7197     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7198     //
7199     // map(s)
7200     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7201     //
7202     // map(s.i)
7203     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7204     //
7205     // map(s.s.f)
7206     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7207     //
7208     // map(s.p)
7209     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7210     //
7211     // map(to: s.p[:22])
7212     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7213     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7214     // &(s.p), &(s.p[0]), 22*sizeof(double),
7215     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7216     // (*) alloc space for struct members, only this is a target parameter
7217     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7218     //      optimizes this entry out, same in the examples below)
7219     // (***) map the pointee (map: to)
7220     //
7221     // map(to: s.ref)
7222     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7223     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7224     // (*) alloc space for struct members, only this is a target parameter
7225     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7226     //      optimizes this entry out, same in the examples below)
7227     // (***) map the pointee (map: to)
7228     //
7229     // map(s.ps)
7230     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7231     //
7232     // map(from: s.ps->s.i)
7233     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7234     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7235     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7236     //
7237     // map(to: s.ps->ps)
7238     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7239     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7240     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7241     //
7242     // map(s.ps->ps->ps)
7243     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7244     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7245     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7246     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7247     //
7248     // map(to: s.ps->ps->s.f[:22])
7249     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7250     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7251     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7252     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7253     //
7254     // map(ps)
7255     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7256     //
7257     // map(ps->i)
7258     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7259     //
7260     // map(ps->s.f)
7261     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7262     //
7263     // map(from: ps->p)
7264     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7265     //
7266     // map(to: ps->p[:22])
7267     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7268     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7269     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7270     //
7271     // map(ps->ps)
7272     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7273     //
7274     // map(from: ps->ps->s.i)
7275     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7276     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7277     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7278     //
7279     // map(from: ps->ps->ps)
7280     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7281     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7282     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7283     //
7284     // map(ps->ps->ps->ps)
7285     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7286     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7287     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7288     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7289     //
7290     // map(to: ps->ps->ps->s.f[:22])
7291     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7292     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7293     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7294     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7295     //
7296     // map(to: s.f[:22]) map(from: s.p[:33])
7297     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7298     //     sizeof(double*) (**), TARGET_PARAM
7299     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7300     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7301     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7302     // (*) allocate contiguous space needed to fit all mapped members even if
7303     //     we allocate space for members not mapped (in this example,
7304     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7305     //     them as well because they fall between &s.f[0] and &s.p)
7306     //
7307     // map(from: s.f[:22]) map(to: ps->p[:33])
7308     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7309     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7310     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7311     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7312     // (*) the struct this entry pertains to is the 2nd element in the list of
7313     //     arguments, hence MEMBER_OF(2)
7314     //
7315     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7316     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7317     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7318     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7319     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7320     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7321     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7322     // (*) the struct this entry pertains to is the 4th element in the list
7323     //     of arguments, hence MEMBER_OF(4)
7324 
7325     // Track if the map information being generated is the first for a capture.
7326     bool IsCaptureFirstInfo = IsFirstComponentList;
7327     // When the variable is on a declare target link or in a to clause with
7328     // unified memory, a reference is needed to hold the host/device address
7329     // of the variable.
7330     bool RequiresReference = false;
7331 
7332     // Scan the components from the base to the complete expression.
7333     auto CI = Components.rbegin();
7334     auto CE = Components.rend();
7335     auto I = CI;
7336 
7337     // Track if the map information being generated is the first for a list of
7338     // components.
7339     bool IsExpressionFirstInfo = true;
7340     bool FirstPointerInComplexData = false;
7341     Address BP = Address::invalid();
7342     const Expr *AssocExpr = I->getAssociatedExpression();
7343     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7344     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7345     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7346 
7347     if (isa<MemberExpr>(AssocExpr)) {
7348       // The base is the 'this' pointer. The content of the pointer is going
7349       // to be the base of the field being mapped.
7350       BP = CGF.LoadCXXThisAddress();
7351     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7352                (OASE &&
7353                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7354       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7355     } else if (OAShE &&
7356                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7357       BP = Address(
7358           CGF.EmitScalarExpr(OAShE->getBase()),
7359           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7360           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7361     } else {
7362       // The base is the reference to the variable.
7363       // BP = &Var.
7364       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7365       if (const auto *VD =
7366               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7367         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7368                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7369           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7370               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7371                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7372                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7373             RequiresReference = true;
7374             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7375           }
7376         }
7377       }
7378 
7379       // If the variable is a pointer and is being dereferenced (i.e. is not
7380       // the last component), the base has to be the pointer itself, not its
7381       // reference. References are ignored for mapping purposes.
7382       QualType Ty =
7383           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7384       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7385         // No need to generate individual map information for the pointer, it
7386         // can be associated with the combined storage if shared memory mode is
7387         // active or the base declaration is not global variable.
7388         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7389         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7390             !VD || VD->hasLocalStorage())
7391           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7392         else
7393           FirstPointerInComplexData = true;
7394         ++I;
7395       }
7396     }
7397 
7398     // Track whether a component of the list should be marked as MEMBER_OF some
7399     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7400     // in a component list should be marked as MEMBER_OF, all subsequent entries
7401     // do not belong to the base struct. E.g.
7402     // struct S2 s;
7403     // s.ps->ps->ps->f[:]
7404     //   (1) (2) (3) (4)
7405     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7406     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7407     // is the pointee of ps(2) which is not member of struct s, so it should not
7408     // be marked as such (it is still PTR_AND_OBJ).
7409     // The variable is initialized to false so that PTR_AND_OBJ entries which
7410     // are not struct members are not considered (e.g. array of pointers to
7411     // data).
7412     bool ShouldBeMemberOf = false;
7413 
7414     // Variable keeping track of whether or not we have encountered a component
7415     // in the component list which is a member expression. Useful when we have a
7416     // pointer or a final array section, in which case it is the previous
7417     // component in the list which tells us whether we have a member expression.
7418     // E.g. X.f[:]
7419     // While processing the final array section "[:]" it is "f" which tells us
7420     // whether we are dealing with a member of a declared struct.
7421     const MemberExpr *EncounteredME = nullptr;
7422 
7423     // Track for the total number of dimension. Start from one for the dummy
7424     // dimension.
7425     uint64_t DimSize = 1;
7426 
7427     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7428     bool IsPrevMemberReference = false;
7429 
7430     for (; I != CE; ++I) {
7431       // If the current component is member of a struct (parent struct) mark it.
7432       if (!EncounteredME) {
7433         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7434         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7435         // as MEMBER_OF the parent struct.
7436         if (EncounteredME) {
7437           ShouldBeMemberOf = true;
7438           // Do not emit as complex pointer if this is actually not array-like
7439           // expression.
7440           if (FirstPointerInComplexData) {
7441             QualType Ty = std::prev(I)
7442                               ->getAssociatedDeclaration()
7443                               ->getType()
7444                               .getNonReferenceType();
7445             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7446             FirstPointerInComplexData = false;
7447           }
7448         }
7449       }
7450 
7451       auto Next = std::next(I);
7452 
7453       // We need to generate the addresses and sizes if this is the last
7454       // component, if the component is a pointer or if it is an array section
7455       // whose length can't be proved to be one. If this is a pointer, it
7456       // becomes the base address for the following components.
7457 
7458       // A final array section, is one whose length can't be proved to be one.
7459       // If the map item is non-contiguous then we don't treat any array section
7460       // as final array section.
7461       bool IsFinalArraySection =
7462           !IsNonContiguous &&
7463           isFinalArraySectionExpression(I->getAssociatedExpression());
7464 
7465       // If we have a declaration for the mapping use that, otherwise use
7466       // the base declaration of the map clause.
7467       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7468                                      ? I->getAssociatedDeclaration()
7469                                      : BaseDecl;
7470       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7471                                                : MapExpr;
7472 
7473       // Get information on whether the element is a pointer. Have to do a
7474       // special treatment for array sections given that they are built-in
7475       // types.
7476       const auto *OASE =
7477           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7478       const auto *OAShE =
7479           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7480       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7481       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7482       bool IsPointer =
7483           OAShE ||
7484           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7485                        .getCanonicalType()
7486                        ->isAnyPointerType()) ||
7487           I->getAssociatedExpression()->getType()->isAnyPointerType();
7488       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7489                                MapDecl &&
7490                                MapDecl->getType()->isLValueReferenceType();
7491       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7492 
7493       if (OASE)
7494         ++DimSize;
7495 
7496       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7497           IsFinalArraySection) {
7498         // If this is not the last component, we expect the pointer to be
7499         // associated with an array expression or member expression.
7500         assert((Next == CE ||
7501                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7502                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7503                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7504                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7505                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7506                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7507                "Unexpected expression");
7508 
7509         Address LB = Address::invalid();
7510         Address LowestElem = Address::invalid();
7511         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7512                                        const MemberExpr *E) {
7513           const Expr *BaseExpr = E->getBase();
7514           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7515           // scalar.
7516           LValue BaseLV;
7517           if (E->isArrow()) {
7518             LValueBaseInfo BaseInfo;
7519             TBAAAccessInfo TBAAInfo;
7520             Address Addr =
7521                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7522             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7523             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7524           } else {
7525             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7526           }
7527           return BaseLV;
7528         };
7529         if (OAShE) {
7530           LowestElem = LB =
7531               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7532                       CGF.ConvertTypeForMem(
7533                           OAShE->getBase()->getType()->getPointeeType()),
7534                       CGF.getContext().getTypeAlignInChars(
7535                           OAShE->getBase()->getType()));
7536         } else if (IsMemberReference) {
7537           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7538           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7539           LowestElem = CGF.EmitLValueForFieldInitialization(
7540                               BaseLVal, cast<FieldDecl>(MapDecl))
7541                            .getAddress(CGF);
7542           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7543                    .getAddress(CGF);
7544         } else {
7545           LowestElem = LB =
7546               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7547                   .getAddress(CGF);
7548         }
7549 
7550         // If this component is a pointer inside the base struct then we don't
7551         // need to create any entry for it - it will be combined with the object
7552         // it is pointing to into a single PTR_AND_OBJ entry.
7553         bool IsMemberPointerOrAddr =
7554             EncounteredME &&
7555             (((IsPointer || ForDeviceAddr) &&
7556               I->getAssociatedExpression() == EncounteredME) ||
7557              (IsPrevMemberReference && !IsPointer) ||
7558              (IsMemberReference && Next != CE &&
7559               !Next->getAssociatedExpression()->getType()->isPointerType()));
7560         if (!OverlappedElements.empty() && Next == CE) {
7561           // Handle base element with the info for overlapped elements.
7562           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7563           assert(!IsPointer &&
7564                  "Unexpected base element with the pointer type.");
7565           // Mark the whole struct as the struct that requires allocation on the
7566           // device.
7567           PartialStruct.LowestElem = {0, LowestElem};
7568           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7569               I->getAssociatedExpression()->getType());
7570           Address HB = CGF.Builder.CreateConstGEP(
7571               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7572                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7573               TypeSize.getQuantity() - 1);
7574           PartialStruct.HighestElem = {
7575               std::numeric_limits<decltype(
7576                   PartialStruct.HighestElem.first)>::max(),
7577               HB};
7578           PartialStruct.Base = BP;
7579           PartialStruct.LB = LB;
7580           assert(
7581               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7582               "Overlapped elements must be used only once for the variable.");
7583           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7584           // Emit data for non-overlapped data.
7585           OpenMPOffloadMappingFlags Flags =
7586               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7587               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7588                              /*AddPtrFlag=*/false,
7589                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7590           llvm::Value *Size = nullptr;
7591           // Do bitcopy of all non-overlapped structure elements.
7592           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7593                    Component : OverlappedElements) {
7594             Address ComponentLB = Address::invalid();
7595             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7596                  Component) {
7597               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7598                 const auto *FD = dyn_cast<FieldDecl>(VD);
7599                 if (FD && FD->getType()->isLValueReferenceType()) {
7600                   const auto *ME =
7601                       cast<MemberExpr>(MC.getAssociatedExpression());
7602                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7603                   ComponentLB =
7604                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7605                           .getAddress(CGF);
7606                 } else {
7607                   ComponentLB =
7608                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7609                           .getAddress(CGF);
7610                 }
7611                 Size = CGF.Builder.CreatePtrDiff(
7612                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7613                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7614                 break;
7615               }
7616             }
7617             assert(Size && "Failed to determine structure size");
7618             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7619             CombinedInfo.BasePointers.push_back(BP.getPointer());
7620             CombinedInfo.Pointers.push_back(LB.getPointer());
7621             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7622                 Size, CGF.Int64Ty, /*isSigned=*/true));
7623             CombinedInfo.Types.push_back(Flags);
7624             CombinedInfo.Mappers.push_back(nullptr);
7625             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7626                                                                       : 1);
7627             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7628           }
7629           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7630           CombinedInfo.BasePointers.push_back(BP.getPointer());
7631           CombinedInfo.Pointers.push_back(LB.getPointer());
7632           Size = CGF.Builder.CreatePtrDiff(
7633               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7634               CGF.EmitCastToVoidPtr(LB.getPointer()));
7635           CombinedInfo.Sizes.push_back(
7636               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7637           CombinedInfo.Types.push_back(Flags);
7638           CombinedInfo.Mappers.push_back(nullptr);
7639           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7640                                                                     : 1);
7641           break;
7642         }
7643         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7644         if (!IsMemberPointerOrAddr ||
7645             (Next == CE && MapType != OMPC_MAP_unknown)) {
7646           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7647           CombinedInfo.BasePointers.push_back(BP.getPointer());
7648           CombinedInfo.Pointers.push_back(LB.getPointer());
7649           CombinedInfo.Sizes.push_back(
7650               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7651           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7652                                                                     : 1);
7653 
7654           // If Mapper is valid, the last component inherits the mapper.
7655           bool HasMapper = Mapper && Next == CE;
7656           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7657 
7658           // We need to add a pointer flag for each map that comes from the
7659           // same expression except for the first one. We also need to signal
7660           // this map is the first one that relates with the current capture
7661           // (there is a set of entries for each capture).
7662           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7663               MapType, MapModifiers, MotionModifiers, IsImplicit,
7664               !IsExpressionFirstInfo || RequiresReference ||
7665                   FirstPointerInComplexData || IsMemberReference,
7666               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7667 
7668           if (!IsExpressionFirstInfo || IsMemberReference) {
7669             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7670             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7671             if (IsPointer || (IsMemberReference && Next != CE))
7672               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7673                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7674                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7675                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7676                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7677 
7678             if (ShouldBeMemberOf) {
7679               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7680               // should be later updated with the correct value of MEMBER_OF.
7681               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7682               // From now on, all subsequent PTR_AND_OBJ entries should not be
7683               // marked as MEMBER_OF.
7684               ShouldBeMemberOf = false;
7685             }
7686           }
7687 
7688           CombinedInfo.Types.push_back(Flags);
7689         }
7690 
7691         // If we have encountered a member expression so far, keep track of the
7692         // mapped member. If the parent is "*this", then the value declaration
7693         // is nullptr.
7694         if (EncounteredME) {
7695           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7696           unsigned FieldIndex = FD->getFieldIndex();
7697 
7698           // Update info about the lowest and highest elements for this struct
7699           if (!PartialStruct.Base.isValid()) {
7700             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7701             if (IsFinalArraySection) {
7702               Address HB =
7703                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7704                       .getAddress(CGF);
7705               PartialStruct.HighestElem = {FieldIndex, HB};
7706             } else {
7707               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7708             }
7709             PartialStruct.Base = BP;
7710             PartialStruct.LB = BP;
7711           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7712             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7713           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7714             PartialStruct.HighestElem = {FieldIndex, LowestElem};
7715           }
7716         }
7717 
7718         // Need to emit combined struct for array sections.
7719         if (IsFinalArraySection || IsNonContiguous)
7720           PartialStruct.IsArraySection = true;
7721 
7722         // If we have a final array section, we are done with this expression.
7723         if (IsFinalArraySection)
7724           break;
7725 
7726         // The pointer becomes the base for the next element.
7727         if (Next != CE)
7728           BP = IsMemberReference ? LowestElem : LB;
7729 
7730         IsExpressionFirstInfo = false;
7731         IsCaptureFirstInfo = false;
7732         FirstPointerInComplexData = false;
7733         IsPrevMemberReference = IsMemberReference;
7734       } else if (FirstPointerInComplexData) {
7735         QualType Ty = Components.rbegin()
7736                           ->getAssociatedDeclaration()
7737                           ->getType()
7738                           .getNonReferenceType();
7739         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7740         FirstPointerInComplexData = false;
7741       }
7742     }
7743     // If ran into the whole component - allocate the space for the whole
7744     // record.
7745     if (!EncounteredME)
7746       PartialStruct.HasCompleteRecord = true;
7747 
7748     if (!IsNonContiguous)
7749       return;
7750 
7751     const ASTContext &Context = CGF.getContext();
7752 
7753     // For supporting stride in array section, we need to initialize the first
7754     // dimension size as 1, first offset as 0, and first count as 1
7755     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7756     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7757     MapValuesArrayTy CurStrides;
7758     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7759     uint64_t ElementTypeSize;
7760 
7761     // Collect Size information for each dimension and get the element size as
7762     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7763     // should be [10, 10] and the first stride is 4 btyes.
7764     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7765          Components) {
7766       const Expr *AssocExpr = Component.getAssociatedExpression();
7767       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7768 
7769       if (!OASE)
7770         continue;
7771 
7772       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7773       auto *CAT = Context.getAsConstantArrayType(Ty);
7774       auto *VAT = Context.getAsVariableArrayType(Ty);
7775 
7776       // We need all the dimension size except for the last dimension.
7777       assert((VAT || CAT || &Component == &*Components.begin()) &&
7778              "Should be either ConstantArray or VariableArray if not the "
7779              "first Component");
7780 
7781       // Get element size if CurStrides is empty.
7782       if (CurStrides.empty()) {
7783         const Type *ElementType = nullptr;
7784         if (CAT)
7785           ElementType = CAT->getElementType().getTypePtr();
7786         else if (VAT)
7787           ElementType = VAT->getElementType().getTypePtr();
7788         else
7789           assert(&Component == &*Components.begin() &&
7790                  "Only expect pointer (non CAT or VAT) when this is the "
7791                  "first Component");
7792         // If ElementType is null, then it means the base is a pointer
7793         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7794         // for next iteration.
7795         if (ElementType) {
7796           // For the case that having pointer as base, we need to remove one
7797           // level of indirection.
7798           if (&Component != &*Components.begin())
7799             ElementType = ElementType->getPointeeOrArrayElementType();
7800           ElementTypeSize =
7801               Context.getTypeSizeInChars(ElementType).getQuantity();
7802           CurStrides.push_back(
7803               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7804         }
7805       }
7806       // Get dimension value except for the last dimension since we don't need
7807       // it.
7808       if (DimSizes.size() < Components.size() - 1) {
7809         if (CAT)
7810           DimSizes.push_back(llvm::ConstantInt::get(
7811               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7812         else if (VAT)
7813           DimSizes.push_back(CGF.Builder.CreateIntCast(
7814               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7815               /*IsSigned=*/false));
7816       }
7817     }
7818 
7819     // Skip the dummy dimension since we have already have its information.
7820     auto *DI = DimSizes.begin() + 1;
7821     // Product of dimension.
7822     llvm::Value *DimProd =
7823         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7824 
7825     // Collect info for non-contiguous. Notice that offset, count, and stride
7826     // are only meaningful for array-section, so we insert a null for anything
7827     // other than array-section.
7828     // Also, the size of offset, count, and stride are not the same as
7829     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7830     // count, and stride are the same as the number of non-contiguous
7831     // declaration in target update to/from clause.
7832     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7833          Components) {
7834       const Expr *AssocExpr = Component.getAssociatedExpression();
7835 
7836       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7837         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7838             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7839             /*isSigned=*/false);
7840         CurOffsets.push_back(Offset);
7841         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7842         CurStrides.push_back(CurStrides.back());
7843         continue;
7844       }
7845 
7846       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7847 
7848       if (!OASE)
7849         continue;
7850 
7851       // Offset
7852       const Expr *OffsetExpr = OASE->getLowerBound();
7853       llvm::Value *Offset = nullptr;
7854       if (!OffsetExpr) {
7855         // If offset is absent, then we just set it to zero.
7856         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7857       } else {
7858         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7859                                            CGF.Int64Ty,
7860                                            /*isSigned=*/false);
7861       }
7862       CurOffsets.push_back(Offset);
7863 
7864       // Count
7865       const Expr *CountExpr = OASE->getLength();
7866       llvm::Value *Count = nullptr;
7867       if (!CountExpr) {
7868         // In Clang, once a high dimension is an array section, we construct all
7869         // the lower dimension as array section, however, for case like
7870         // arr[0:2][2], Clang construct the inner dimension as an array section
7871         // but it actually is not in an array section form according to spec.
7872         if (!OASE->getColonLocFirst().isValid() &&
7873             !OASE->getColonLocSecond().isValid()) {
7874           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7875         } else {
7876           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7877           // When the length is absent it defaults to ⌈(size −
7878           // lower-bound)/stride⌉, where size is the size of the array
7879           // dimension.
7880           const Expr *StrideExpr = OASE->getStride();
7881           llvm::Value *Stride =
7882               StrideExpr
7883                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7884                                               CGF.Int64Ty, /*isSigned=*/false)
7885                   : nullptr;
7886           if (Stride)
7887             Count = CGF.Builder.CreateUDiv(
7888                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7889           else
7890             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7891         }
7892       } else {
7893         Count = CGF.EmitScalarExpr(CountExpr);
7894       }
7895       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7896       CurCounts.push_back(Count);
7897 
7898       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7899       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7900       //              Offset      Count     Stride
7901       //    D0          0           1         4    (int)    <- dummy dimension
7902       //    D1          0           2         8    (2 * (1) * 4)
7903       //    D2          1           2         20   (1 * (1 * 5) * 4)
7904       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7905       const Expr *StrideExpr = OASE->getStride();
7906       llvm::Value *Stride =
7907           StrideExpr
7908               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7909                                           CGF.Int64Ty, /*isSigned=*/false)
7910               : nullptr;
7911       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7912       if (Stride)
7913         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7914       else
7915         CurStrides.push_back(DimProd);
7916       if (DI != DimSizes.end())
7917         ++DI;
7918     }
7919 
7920     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7921     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7922     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7923   }
7924 
7925   /// Return the adjusted map modifiers if the declaration a capture refers to
7926   /// appears in a first-private clause. This is expected to be used only with
7927   /// directives that start with 'target'.
7928   OpenMPOffloadMappingFlags
7929   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7930     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7931 
7932     // A first private variable captured by reference will use only the
7933     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7934     // declaration is known as first-private in this handler.
7935     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7936       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7937         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7938                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7939       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7940              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7941     }
7942     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7943     if (I != LambdasMap.end())
7944       // for map(to: lambda): using user specified map type.
7945       return getMapTypeBits(
7946           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7947           /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7948           /*AddPtrFlag=*/false,
7949           /*AddIsTargetParamFlag=*/false,
7950           /*isNonContiguous=*/false);
7951     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7952            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7953   }
7954 
7955   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7956     // Rotate by getFlagMemberOffset() bits.
7957     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7958                                                   << getFlagMemberOffset());
7959   }
7960 
7961   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7962                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7963     // If the entry is PTR_AND_OBJ but has not been marked with the special
7964     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7965     // marked as MEMBER_OF.
7966     if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7967             Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
7968         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7969             (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
7970             OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
7971       return;
7972 
7973     // Reset the placeholder value to prepare the flag for the assignment of the
7974     // proper MEMBER_OF value.
7975     Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7976     Flags |= MemberOfFlag;
7977   }
7978 
7979   void getPlainLayout(const CXXRecordDecl *RD,
7980                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7981                       bool AsBase) const {
7982     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7983 
7984     llvm::StructType *St =
7985         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7986 
7987     unsigned NumElements = St->getNumElements();
7988     llvm::SmallVector<
7989         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7990         RecordLayout(NumElements);
7991 
7992     // Fill bases.
7993     for (const auto &I : RD->bases()) {
7994       if (I.isVirtual())
7995         continue;
7996       const auto *Base = I.getType()->getAsCXXRecordDecl();
7997       // Ignore empty bases.
7998       if (Base->isEmpty() || CGF.getContext()
7999                                  .getASTRecordLayout(Base)
8000                                  .getNonVirtualSize()
8001                                  .isZero())
8002         continue;
8003 
8004       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8005       RecordLayout[FieldIndex] = Base;
8006     }
8007     // Fill in virtual bases.
8008     for (const auto &I : RD->vbases()) {
8009       const auto *Base = I.getType()->getAsCXXRecordDecl();
8010       // Ignore empty bases.
8011       if (Base->isEmpty())
8012         continue;
8013       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8014       if (RecordLayout[FieldIndex])
8015         continue;
8016       RecordLayout[FieldIndex] = Base;
8017     }
8018     // Fill in all the fields.
8019     assert(!RD->isUnion() && "Unexpected union.");
8020     for (const auto *Field : RD->fields()) {
8021       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8022       // will fill in later.)
8023       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8024         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8025         RecordLayout[FieldIndex] = Field;
8026       }
8027     }
8028     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8029              &Data : RecordLayout) {
8030       if (Data.isNull())
8031         continue;
8032       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8033         getPlainLayout(Base, Layout, /*AsBase=*/true);
8034       else
8035         Layout.push_back(Data.get<const FieldDecl *>());
8036     }
8037   }
8038 
8039   /// Generate all the base pointers, section pointers, sizes, map types, and
8040   /// mappers for the extracted mappable expressions (all included in \a
8041   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8042   /// pair of the relevant declaration and index where it occurs is appended to
8043   /// the device pointers info array.
8044   void generateAllInfoForClauses(
8045       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8046       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8047           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8048     // We have to process the component lists that relate with the same
8049     // declaration in a single chunk so that we can generate the map flags
8050     // correctly. Therefore, we organize all lists in a map.
8051     enum MapKind { Present, Allocs, Other, Total };
8052     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8053                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8054         Info;
8055 
8056     // Helper function to fill the information map for the different supported
8057     // clauses.
8058     auto &&InfoGen =
8059         [&Info, &SkipVarSet](
8060             const ValueDecl *D, MapKind Kind,
8061             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8062             OpenMPMapClauseKind MapType,
8063             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8064             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8065             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8066             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8067           if (SkipVarSet.contains(D))
8068             return;
8069           auto It = Info.find(D);
8070           if (It == Info.end())
8071             It = Info
8072                      .insert(std::make_pair(
8073                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8074                      .first;
8075           It->second[Kind].emplace_back(
8076               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8077               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8078         };
8079 
8080     for (const auto *Cl : Clauses) {
8081       const auto *C = dyn_cast<OMPMapClause>(Cl);
8082       if (!C)
8083         continue;
8084       MapKind Kind = Other;
8085       if (llvm::is_contained(C->getMapTypeModifiers(),
8086                              OMPC_MAP_MODIFIER_present))
8087         Kind = Present;
8088       else if (C->getMapType() == OMPC_MAP_alloc)
8089         Kind = Allocs;
8090       const auto *EI = C->getVarRefs().begin();
8091       for (const auto L : C->component_lists()) {
8092         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8093         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8094                 C->getMapTypeModifiers(), std::nullopt,
8095                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8096                 E);
8097         ++EI;
8098       }
8099     }
8100     for (const auto *Cl : Clauses) {
8101       const auto *C = dyn_cast<OMPToClause>(Cl);
8102       if (!C)
8103         continue;
8104       MapKind Kind = Other;
8105       if (llvm::is_contained(C->getMotionModifiers(),
8106                              OMPC_MOTION_MODIFIER_present))
8107         Kind = Present;
8108       const auto *EI = C->getVarRefs().begin();
8109       for (const auto L : C->component_lists()) {
8110         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
8111                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8112                 C->isImplicit(), std::get<2>(L), *EI);
8113         ++EI;
8114       }
8115     }
8116     for (const auto *Cl : Clauses) {
8117       const auto *C = dyn_cast<OMPFromClause>(Cl);
8118       if (!C)
8119         continue;
8120       MapKind Kind = Other;
8121       if (llvm::is_contained(C->getMotionModifiers(),
8122                              OMPC_MOTION_MODIFIER_present))
8123         Kind = Present;
8124       const auto *EI = C->getVarRefs().begin();
8125       for (const auto L : C->component_lists()) {
8126         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
8127                 std::nullopt, C->getMotionModifiers(),
8128                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8129                 *EI);
8130         ++EI;
8131       }
8132     }
8133 
8134     // Look at the use_device_ptr and use_device_addr clauses information and
8135     // mark the existing map entries as such. If there is no map information for
8136     // an entry in the use_device_ptr and use_device_addr list, we create one
8137     // with map type 'alloc' and zero size section. It is the user fault if that
8138     // was not mapped before. If there is no map information and the pointer is
8139     // a struct member, then we defer the emission of that entry until the whole
8140     // struct has been processed.
8141     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8142                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8143         DeferredInfo;
8144     MapCombinedInfoTy UseDeviceDataCombinedInfo;
8145 
8146     auto &&UseDeviceDataCombinedInfoGen =
8147         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8148                                      CodeGenFunction &CGF) {
8149           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8150           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8151           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8152           UseDeviceDataCombinedInfo.Sizes.push_back(
8153               llvm::Constant::getNullValue(CGF.Int64Ty));
8154           UseDeviceDataCombinedInfo.Types.push_back(
8155               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8156           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8157         };
8158 
8159     auto &&MapInfoGen =
8160         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8161          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8162                    OMPClauseMappableExprCommon::MappableExprComponentListRef
8163                        Components,
8164                    bool IsImplicit, bool IsDevAddr) {
8165           // We didn't find any match in our map information - generate a zero
8166           // size array section - if the pointer is a struct member we defer
8167           // this action until the whole struct has been processed.
8168           if (isa<MemberExpr>(IE)) {
8169             // Insert the pointer into Info to be processed by
8170             // generateInfoForComponentList. Because it is a member pointer
8171             // without a pointee, no entry will be generated for it, therefore
8172             // we need to generate one after the whole struct has been
8173             // processed. Nonetheless, generateInfoForComponentList must be
8174             // called to take the pointer into account for the calculation of
8175             // the range of the partial struct.
8176             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
8177                     std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
8178                     nullptr, nullptr, IsDevAddr);
8179             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8180           } else {
8181             llvm::Value *Ptr;
8182             if (IsDevAddr) {
8183               if (IE->isGLValue())
8184                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8185               else
8186                 Ptr = CGF.EmitScalarExpr(IE);
8187             } else {
8188               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8189             }
8190             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
8191           }
8192         };
8193 
8194     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8195                                     const Expr *IE, bool IsDevAddr) -> bool {
8196       // We potentially have map information for this declaration already.
8197       // Look for the first set of components that refer to it. If found,
8198       // return true.
8199       // If the first component is a member expression, we have to look into
8200       // 'this', which maps to null in the map of map information. Otherwise
8201       // look directly for the information.
8202       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8203       if (It != Info.end()) {
8204         bool Found = false;
8205         for (auto &Data : It->second) {
8206           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8207             return MI.Components.back().getAssociatedDeclaration() == VD;
8208           });
8209           // If we found a map entry, signal that the pointer has to be
8210           // returned and move on to the next declaration. Exclude cases where
8211           // the base pointer is mapped as array subscript, array section or
8212           // array shaping. The base address is passed as a pointer to base in
8213           // this case and cannot be used as a base for use_device_ptr list
8214           // item.
8215           if (CI != Data.end()) {
8216             if (IsDevAddr) {
8217               CI->ReturnDevicePointer = true;
8218               Found = true;
8219               break;
8220             } else {
8221               auto PrevCI = std::next(CI->Components.rbegin());
8222               const auto *VarD = dyn_cast<VarDecl>(VD);
8223               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8224                   isa<MemberExpr>(IE) ||
8225                   !VD->getType().getNonReferenceType()->isPointerType() ||
8226                   PrevCI == CI->Components.rend() ||
8227                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8228                   VarD->hasLocalStorage()) {
8229                 CI->ReturnDevicePointer = true;
8230                 Found = true;
8231                 break;
8232               }
8233             }
8234           }
8235         }
8236         return Found;
8237       }
8238       return false;
8239     };
8240 
8241     // Look at the use_device_ptr clause information and mark the existing map
8242     // entries as such. If there is no map information for an entry in the
8243     // use_device_ptr list, we create one with map type 'alloc' and zero size
8244     // section. It is the user fault if that was not mapped before. If there is
8245     // no map information and the pointer is a struct member, then we defer the
8246     // emission of that entry until the whole struct has been processed.
8247     for (const auto *Cl : Clauses) {
8248       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8249       if (!C)
8250         continue;
8251       for (const auto L : C->component_lists()) {
8252         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8253             std::get<1>(L);
8254         assert(!Components.empty() &&
8255                "Not expecting empty list of components!");
8256         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8257         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8258         const Expr *IE = Components.back().getAssociatedExpression();
8259         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8260           continue;
8261         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8262                    /*IsDevAddr=*/false);
8263       }
8264     }
8265 
8266     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8267     for (const auto *Cl : Clauses) {
8268       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8269       if (!C)
8270         continue;
8271       for (const auto L : C->component_lists()) {
8272         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8273             std::get<1>(L);
8274         assert(!std::get<1>(L).empty() &&
8275                "Not expecting empty list of components!");
8276         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8277         if (!Processed.insert(VD).second)
8278           continue;
8279         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8280         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8281         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8282           continue;
8283         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8284                    /*IsDevAddr=*/true);
8285       }
8286     }
8287 
8288     for (const auto &Data : Info) {
8289       StructRangeInfoTy PartialStruct;
8290       // Temporary generated information.
8291       MapCombinedInfoTy CurInfo;
8292       const Decl *D = Data.first;
8293       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8294       for (const auto &M : Data.second) {
8295         for (const MapInfo &L : M) {
8296           assert(!L.Components.empty() &&
8297                  "Not expecting declaration with no component lists.");
8298 
8299           // Remember the current base pointer index.
8300           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8301           CurInfo.NonContigInfo.IsNonContiguous =
8302               L.Components.back().isNonContiguous();
8303           generateInfoForComponentList(
8304               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8305               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8306               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8307 
8308           // If this entry relates with a device pointer, set the relevant
8309           // declaration and add the 'return pointer' flag.
8310           if (L.ReturnDevicePointer) {
8311             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8312                    "Unexpected number of mapped base pointers.");
8313 
8314             const ValueDecl *RelevantVD =
8315                 L.Components.back().getAssociatedDeclaration();
8316             assert(RelevantVD &&
8317                    "No relevant declaration related with device pointer??");
8318 
8319             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8320                 RelevantVD);
8321             CurInfo.Types[CurrentBasePointersIdx] |=
8322                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8323           }
8324         }
8325       }
8326 
8327       // Append any pending zero-length pointers which are struct members and
8328       // used with use_device_ptr or use_device_addr.
8329       auto CI = DeferredInfo.find(Data.first);
8330       if (CI != DeferredInfo.end()) {
8331         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8332           llvm::Value *BasePtr;
8333           llvm::Value *Ptr;
8334           if (L.ForDeviceAddr) {
8335             if (L.IE->isGLValue())
8336               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8337             else
8338               Ptr = this->CGF.EmitScalarExpr(L.IE);
8339             BasePtr = Ptr;
8340             // Entry is RETURN_PARAM. Also, set the placeholder value
8341             // MEMBER_OF=FFFF so that the entry is later updated with the
8342             // correct value of MEMBER_OF.
8343             CurInfo.Types.push_back(
8344                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8345                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8346           } else {
8347             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8348             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8349                                              L.IE->getExprLoc());
8350             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8351             // placeholder value MEMBER_OF=FFFF so that the entry is later
8352             // updated with the correct value of MEMBER_OF.
8353             CurInfo.Types.push_back(
8354                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8355                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8356                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8357           }
8358           CurInfo.Exprs.push_back(L.VD);
8359           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8360           CurInfo.Pointers.push_back(Ptr);
8361           CurInfo.Sizes.push_back(
8362               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8363           CurInfo.Mappers.push_back(nullptr);
8364         }
8365       }
8366       // If there is an entry in PartialStruct it means we have a struct with
8367       // individual members mapped. Emit an extra combined entry.
8368       if (PartialStruct.Base.isValid()) {
8369         CurInfo.NonContigInfo.Dims.push_back(0);
8370         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8371       }
8372 
8373       // We need to append the results of this capture to what we already
8374       // have.
8375       CombinedInfo.append(CurInfo);
8376     }
8377     // Append data for use_device_ptr clauses.
8378     CombinedInfo.append(UseDeviceDataCombinedInfo);
8379   }
8380 
8381 public:
8382   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8383       : CurDir(&Dir), CGF(CGF) {
8384     // Extract firstprivate clause information.
8385     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8386       for (const auto *D : C->varlists())
8387         FirstPrivateDecls.try_emplace(
8388             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8389     // Extract implicit firstprivates from uses_allocators clauses.
8390     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8391       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8392         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8393         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8394           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8395                                         /*Implicit=*/true);
8396         else if (const auto *VD = dyn_cast<VarDecl>(
8397                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8398                          ->getDecl()))
8399           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8400       }
8401     }
8402     // Extract device pointer clause information.
8403     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8404       for (auto L : C->component_lists())
8405         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8406     // Extract device addr clause information.
8407     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8408       for (auto L : C->component_lists())
8409         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8410     // Extract map information.
8411     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8412       if (C->getMapType() != OMPC_MAP_to)
8413         continue;
8414       for (auto L : C->component_lists()) {
8415         const ValueDecl *VD = std::get<0>(L);
8416         const auto *RD = VD ? VD->getType()
8417                                   .getCanonicalType()
8418                                   .getNonReferenceType()
8419                                   ->getAsCXXRecordDecl()
8420                             : nullptr;
8421         if (RD && RD->isLambda())
8422           LambdasMap.try_emplace(std::get<0>(L), C);
8423       }
8424     }
8425   }
8426 
8427   /// Constructor for the declare mapper directive.
8428   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8429       : CurDir(&Dir), CGF(CGF) {}
8430 
8431   /// Generate code for the combined entry if we have a partially mapped struct
8432   /// and take care of the mapping flags of the arguments corresponding to
8433   /// individual struct members.
8434   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8435                          MapFlagsArrayTy &CurTypes,
8436                          const StructRangeInfoTy &PartialStruct,
8437                          const ValueDecl *VD = nullptr,
8438                          bool NotTargetParams = true) const {
8439     if (CurTypes.size() == 1 &&
8440         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8441          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8442         !PartialStruct.IsArraySection)
8443       return;
8444     Address LBAddr = PartialStruct.LowestElem.second;
8445     Address HBAddr = PartialStruct.HighestElem.second;
8446     if (PartialStruct.HasCompleteRecord) {
8447       LBAddr = PartialStruct.LB;
8448       HBAddr = PartialStruct.LB;
8449     }
8450     CombinedInfo.Exprs.push_back(VD);
8451     // Base is the base of the struct
8452     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8453     // Pointer is the address of the lowest element
8454     llvm::Value *LB = LBAddr.getPointer();
8455     const CXXMethodDecl *MD =
8456         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8457     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8458     bool HasBaseClass = RD ? RD->getNumBases() > 0 : false;
8459     // There should not be a mapper for a combined entry.
8460     if (HasBaseClass) {
8461       // OpenMP 5.2 148:21:
8462       // If the target construct is within a class non-static member function,
8463       // and a variable is an accessible data member of the object for which the
8464       // non-static data member function is invoked, the variable is treated as
8465       // if the this[:1] expression had appeared in a map clause with a map-type
8466       // of tofrom.
8467       // Emit this[:1]
8468       CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8469       QualType Ty = MD->getThisType()->getPointeeType();
8470       llvm::Value *Size =
8471           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8472                                     /*isSigned=*/true);
8473       CombinedInfo.Sizes.push_back(Size);
8474     } else {
8475       CombinedInfo.Pointers.push_back(LB);
8476       // Size is (addr of {highest+1} element) - (addr of lowest element)
8477       llvm::Value *HB = HBAddr.getPointer();
8478       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8479           HBAddr.getElementType(), HB, /*Idx0=*/1);
8480       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8481       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8482       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8483       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8484                                                     /*isSigned=*/false);
8485       CombinedInfo.Sizes.push_back(Size);
8486     }
8487     CombinedInfo.Mappers.push_back(nullptr);
8488     // Map type is always TARGET_PARAM, if generate info for captures.
8489     CombinedInfo.Types.push_back(
8490         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8491                         : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8492     // If any element has the present modifier, then make sure the runtime
8493     // doesn't attempt to allocate the struct.
8494     if (CurTypes.end() !=
8495         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8496           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8497               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8498         }))
8499       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8500     // Remove TARGET_PARAM flag from the first element
8501     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8502     // If any element has the ompx_hold modifier, then make sure the runtime
8503     // uses the hold reference count for the struct as a whole so that it won't
8504     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8505     // elements as well so the runtime knows which reference count to check
8506     // when determining whether it's time for device-to-host transfers of
8507     // individual elements.
8508     if (CurTypes.end() !=
8509         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8510           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8511               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8512         })) {
8513       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8514       for (auto &M : CurTypes)
8515         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8516     }
8517 
8518     // All other current entries will be MEMBER_OF the combined entry
8519     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8520     // 0xFFFF in the MEMBER_OF field).
8521     OpenMPOffloadMappingFlags MemberOfFlag =
8522         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8523     for (auto &M : CurTypes)
8524       setCorrectMemberOfFlag(M, MemberOfFlag);
8525   }
8526 
8527   /// Generate all the base pointers, section pointers, sizes, map types, and
8528   /// mappers for the extracted mappable expressions (all included in \a
8529   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8530   /// pair of the relevant declaration and index where it occurs is appended to
8531   /// the device pointers info array.
8532   void generateAllInfo(
8533       MapCombinedInfoTy &CombinedInfo,
8534       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8535           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8536     assert(CurDir.is<const OMPExecutableDirective *>() &&
8537            "Expect a executable directive");
8538     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8539     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8540   }
8541 
8542   /// Generate all the base pointers, section pointers, sizes, map types, and
8543   /// mappers for the extracted map clauses of user-defined mapper (all included
8544   /// in \a CombinedInfo).
8545   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8546     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8547            "Expect a declare mapper directive");
8548     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8549     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8550   }
8551 
8552   /// Emit capture info for lambdas for variables captured by reference.
8553   void generateInfoForLambdaCaptures(
8554       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8555       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8556     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8557     const auto *RD = VDType->getAsCXXRecordDecl();
8558     if (!RD || !RD->isLambda())
8559       return;
8560     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8561                    CGF.getContext().getDeclAlign(VD));
8562     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8563     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8564     FieldDecl *ThisCapture = nullptr;
8565     RD->getCaptureFields(Captures, ThisCapture);
8566     if (ThisCapture) {
8567       LValue ThisLVal =
8568           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8569       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8570       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8571                                  VDLVal.getPointer(CGF));
8572       CombinedInfo.Exprs.push_back(VD);
8573       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8574       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8575       CombinedInfo.Sizes.push_back(
8576           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8577                                     CGF.Int64Ty, /*isSigned=*/true));
8578       CombinedInfo.Types.push_back(
8579           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8580           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8581           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8582           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8583       CombinedInfo.Mappers.push_back(nullptr);
8584     }
8585     for (const LambdaCapture &LC : RD->captures()) {
8586       if (!LC.capturesVariable())
8587         continue;
8588       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8589       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8590         continue;
8591       auto It = Captures.find(VD);
8592       assert(It != Captures.end() && "Found lambda capture without field.");
8593       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8594       if (LC.getCaptureKind() == LCK_ByRef) {
8595         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8596         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8597                                    VDLVal.getPointer(CGF));
8598         CombinedInfo.Exprs.push_back(VD);
8599         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8600         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8601         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8602             CGF.getTypeSize(
8603                 VD->getType().getCanonicalType().getNonReferenceType()),
8604             CGF.Int64Ty, /*isSigned=*/true));
8605       } else {
8606         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8607         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8608                                    VDLVal.getPointer(CGF));
8609         CombinedInfo.Exprs.push_back(VD);
8610         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8611         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8612         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8613       }
8614       CombinedInfo.Types.push_back(
8615           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8616           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8617           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8618           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8619       CombinedInfo.Mappers.push_back(nullptr);
8620     }
8621   }
8622 
8623   /// Set correct indices for lambdas captures.
8624   void adjustMemberOfForLambdaCaptures(
8625       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8626       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8627       MapFlagsArrayTy &Types) const {
8628     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8629       // Set correct member_of idx for all implicit lambda captures.
8630       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8631                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8632                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8633                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8634         continue;
8635       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8636       assert(BasePtr && "Unable to find base lambda address.");
8637       int TgtIdx = -1;
8638       for (unsigned J = I; J > 0; --J) {
8639         unsigned Idx = J - 1;
8640         if (Pointers[Idx] != BasePtr)
8641           continue;
8642         TgtIdx = Idx;
8643         break;
8644       }
8645       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8646       // All other current entries will be MEMBER_OF the combined entry
8647       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8648       // 0xFFFF in the MEMBER_OF field).
8649       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8650       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8651     }
8652   }
8653 
8654   /// Generate the base pointers, section pointers, sizes, map types, and
8655   /// mappers associated to a given capture (all included in \a CombinedInfo).
8656   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8657                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8658                               StructRangeInfoTy &PartialStruct) const {
8659     assert(!Cap->capturesVariableArrayType() &&
8660            "Not expecting to generate map info for a variable array type!");
8661 
8662     // We need to know when we generating information for the first component
8663     const ValueDecl *VD = Cap->capturesThis()
8664                               ? nullptr
8665                               : Cap->getCapturedVar()->getCanonicalDecl();
8666 
8667     // for map(to: lambda): skip here, processing it in
8668     // generateDefaultMapInfo
8669     if (LambdasMap.count(VD))
8670       return;
8671 
8672     // If this declaration appears in a is_device_ptr clause we just have to
8673     // pass the pointer by value. If it is a reference to a declaration, we just
8674     // pass its value.
8675     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8676       CombinedInfo.Exprs.push_back(VD);
8677       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8678       CombinedInfo.Pointers.push_back(Arg);
8679       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8680           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8681           /*isSigned=*/true));
8682       CombinedInfo.Types.push_back(
8683           (Cap->capturesVariable()
8684                ? OpenMPOffloadMappingFlags::OMP_MAP_TO
8685                : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) |
8686           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8687       CombinedInfo.Mappers.push_back(nullptr);
8688       return;
8689     }
8690 
8691     using MapData =
8692         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8693                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8694                    const ValueDecl *, const Expr *>;
8695     SmallVector<MapData, 4> DeclComponentLists;
8696     // For member fields list in is_device_ptr, store it in
8697     // DeclComponentLists for generating components info.
8698     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8699     auto It = DevPointersMap.find(VD);
8700     if (It != DevPointersMap.end())
8701       for (const auto &MCL : It->second)
8702         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8703                                         /*IsImpicit = */ true, nullptr,
8704                                         nullptr);
8705     auto I = HasDevAddrsMap.find(VD);
8706     if (I != HasDevAddrsMap.end())
8707       for (const auto &MCL : I->second)
8708         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8709                                         /*IsImpicit = */ true, nullptr,
8710                                         nullptr);
8711     assert(CurDir.is<const OMPExecutableDirective *>() &&
8712            "Expect a executable directive");
8713     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8714     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8715       const auto *EI = C->getVarRefs().begin();
8716       for (const auto L : C->decl_component_lists(VD)) {
8717         const ValueDecl *VDecl, *Mapper;
8718         // The Expression is not correct if the mapping is implicit
8719         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8720         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8721         std::tie(VDecl, Components, Mapper) = L;
8722         assert(VDecl == VD && "We got information for the wrong declaration??");
8723         assert(!Components.empty() &&
8724                "Not expecting declaration with no component lists.");
8725         DeclComponentLists.emplace_back(Components, C->getMapType(),
8726                                         C->getMapTypeModifiers(),
8727                                         C->isImplicit(), Mapper, E);
8728         ++EI;
8729       }
8730     }
8731     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8732                                              const MapData &RHS) {
8733       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8734       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8735       bool HasPresent =
8736           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8737       bool HasAllocs = MapType == OMPC_MAP_alloc;
8738       MapModifiers = std::get<2>(RHS);
8739       MapType = std::get<1>(LHS);
8740       bool HasPresentR =
8741           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8742       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8743       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8744     });
8745 
8746     // Find overlapping elements (including the offset from the base element).
8747     llvm::SmallDenseMap<
8748         const MapData *,
8749         llvm::SmallVector<
8750             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8751         4>
8752         OverlappedData;
8753     size_t Count = 0;
8754     for (const MapData &L : DeclComponentLists) {
8755       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8756       OpenMPMapClauseKind MapType;
8757       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8758       bool IsImplicit;
8759       const ValueDecl *Mapper;
8760       const Expr *VarRef;
8761       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8762           L;
8763       ++Count;
8764       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8765         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8766         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8767                  VarRef) = L1;
8768         auto CI = Components.rbegin();
8769         auto CE = Components.rend();
8770         auto SI = Components1.rbegin();
8771         auto SE = Components1.rend();
8772         for (; CI != CE && SI != SE; ++CI, ++SI) {
8773           if (CI->getAssociatedExpression()->getStmtClass() !=
8774               SI->getAssociatedExpression()->getStmtClass())
8775             break;
8776           // Are we dealing with different variables/fields?
8777           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8778             break;
8779         }
8780         // Found overlapping if, at least for one component, reached the head
8781         // of the components list.
8782         if (CI == CE || SI == SE) {
8783           // Ignore it if it is the same component.
8784           if (CI == CE && SI == SE)
8785             continue;
8786           const auto It = (SI == SE) ? CI : SI;
8787           // If one component is a pointer and another one is a kind of
8788           // dereference of this pointer (array subscript, section, dereference,
8789           // etc.), it is not an overlapping.
8790           // Same, if one component is a base and another component is a
8791           // dereferenced pointer memberexpr with the same base.
8792           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8793               (std::prev(It)->getAssociatedDeclaration() &&
8794                std::prev(It)
8795                    ->getAssociatedDeclaration()
8796                    ->getType()
8797                    ->isPointerType()) ||
8798               (It->getAssociatedDeclaration() &&
8799                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8800                std::next(It) != CE && std::next(It) != SE))
8801             continue;
8802           const MapData &BaseData = CI == CE ? L : L1;
8803           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8804               SI == SE ? Components : Components1;
8805           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8806           OverlappedElements.getSecond().push_back(SubData);
8807         }
8808       }
8809     }
8810     // Sort the overlapped elements for each item.
8811     llvm::SmallVector<const FieldDecl *, 4> Layout;
8812     if (!OverlappedData.empty()) {
8813       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8814       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8815       while (BaseType != OrigType) {
8816         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8817         OrigType = BaseType->getPointeeOrArrayElementType();
8818       }
8819 
8820       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8821         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8822       else {
8823         const auto *RD = BaseType->getAsRecordDecl();
8824         Layout.append(RD->field_begin(), RD->field_end());
8825       }
8826     }
8827     for (auto &Pair : OverlappedData) {
8828       llvm::stable_sort(
8829           Pair.getSecond(),
8830           [&Layout](
8831               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8832               OMPClauseMappableExprCommon::MappableExprComponentListRef
8833                   Second) {
8834             auto CI = First.rbegin();
8835             auto CE = First.rend();
8836             auto SI = Second.rbegin();
8837             auto SE = Second.rend();
8838             for (; CI != CE && SI != SE; ++CI, ++SI) {
8839               if (CI->getAssociatedExpression()->getStmtClass() !=
8840                   SI->getAssociatedExpression()->getStmtClass())
8841                 break;
8842               // Are we dealing with different variables/fields?
8843               if (CI->getAssociatedDeclaration() !=
8844                   SI->getAssociatedDeclaration())
8845                 break;
8846             }
8847 
8848             // Lists contain the same elements.
8849             if (CI == CE && SI == SE)
8850               return false;
8851 
8852             // List with less elements is less than list with more elements.
8853             if (CI == CE || SI == SE)
8854               return CI == CE;
8855 
8856             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8857             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8858             if (FD1->getParent() == FD2->getParent())
8859               return FD1->getFieldIndex() < FD2->getFieldIndex();
8860             const auto *It =
8861                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8862                   return FD == FD1 || FD == FD2;
8863                 });
8864             return *It == FD1;
8865           });
8866     }
8867 
8868     // Associated with a capture, because the mapping flags depend on it.
8869     // Go through all of the elements with the overlapped elements.
8870     bool IsFirstComponentList = true;
8871     for (const auto &Pair : OverlappedData) {
8872       const MapData &L = *Pair.getFirst();
8873       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8874       OpenMPMapClauseKind MapType;
8875       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8876       bool IsImplicit;
8877       const ValueDecl *Mapper;
8878       const Expr *VarRef;
8879       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8880           L;
8881       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8882           OverlappedComponents = Pair.getSecond();
8883       generateInfoForComponentList(
8884           MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8885           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8886           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8887       IsFirstComponentList = false;
8888     }
8889     // Go through other elements without overlapped elements.
8890     for (const MapData &L : DeclComponentLists) {
8891       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8892       OpenMPMapClauseKind MapType;
8893       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8894       bool IsImplicit;
8895       const ValueDecl *Mapper;
8896       const Expr *VarRef;
8897       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8898           L;
8899       auto It = OverlappedData.find(&L);
8900       if (It == OverlappedData.end())
8901         generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
8902                                      Components, CombinedInfo, PartialStruct,
8903                                      IsFirstComponentList, IsImplicit, Mapper,
8904                                      /*ForDeviceAddr=*/false, VD, VarRef);
8905       IsFirstComponentList = false;
8906     }
8907   }
8908 
8909   /// Generate the default map information for a given capture \a CI,
8910   /// record field declaration \a RI and captured value \a CV.
8911   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8912                               const FieldDecl &RI, llvm::Value *CV,
8913                               MapCombinedInfoTy &CombinedInfo) const {
8914     bool IsImplicit = true;
8915     // Do the default mapping.
8916     if (CI.capturesThis()) {
8917       CombinedInfo.Exprs.push_back(nullptr);
8918       CombinedInfo.BasePointers.push_back(CV);
8919       CombinedInfo.Pointers.push_back(CV);
8920       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8921       CombinedInfo.Sizes.push_back(
8922           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8923                                     CGF.Int64Ty, /*isSigned=*/true));
8924       // Default map type.
8925       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8926                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8927     } else if (CI.capturesVariableByCopy()) {
8928       const VarDecl *VD = CI.getCapturedVar();
8929       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8930       CombinedInfo.BasePointers.push_back(CV);
8931       CombinedInfo.Pointers.push_back(CV);
8932       if (!RI.getType()->isAnyPointerType()) {
8933         // We have to signal to the runtime captures passed by value that are
8934         // not pointers.
8935         CombinedInfo.Types.push_back(
8936             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8937         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8938             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8939       } else {
8940         // Pointers are implicitly mapped with a zero size and no flags
8941         // (other than first map that is added for all implicit maps).
8942         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8943         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8944       }
8945       auto I = FirstPrivateDecls.find(VD);
8946       if (I != FirstPrivateDecls.end())
8947         IsImplicit = I->getSecond();
8948     } else {
8949       assert(CI.capturesVariable() && "Expected captured reference.");
8950       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8951       QualType ElementType = PtrTy->getPointeeType();
8952       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8953           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8954       // The default map type for a scalar/complex type is 'to' because by
8955       // default the value doesn't have to be retrieved. For an aggregate
8956       // type, the default is 'tofrom'.
8957       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8958       const VarDecl *VD = CI.getCapturedVar();
8959       auto I = FirstPrivateDecls.find(VD);
8960       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8961       CombinedInfo.BasePointers.push_back(CV);
8962       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8963         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8964             CV, ElementType, CGF.getContext().getDeclAlign(VD),
8965             AlignmentSource::Decl));
8966         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8967       } else {
8968         CombinedInfo.Pointers.push_back(CV);
8969       }
8970       if (I != FirstPrivateDecls.end())
8971         IsImplicit = I->getSecond();
8972     }
8973     // Every default map produces a single argument which is a target parameter.
8974     CombinedInfo.Types.back() |=
8975         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8976 
8977     // Add flag stating this is an implicit map.
8978     if (IsImplicit)
8979       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8980 
8981     // No user-defined mapper for default mapping.
8982     CombinedInfo.Mappers.push_back(nullptr);
8983   }
8984 };
8985 } // anonymous namespace
8986 
8987 static void emitNonContiguousDescriptor(
8988     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8989     CGOpenMPRuntime::TargetDataInfo &Info) {
8990   CodeGenModule &CGM = CGF.CGM;
8991   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
8992       &NonContigInfo = CombinedInfo.NonContigInfo;
8993 
8994   // Build an array of struct descriptor_dim and then assign it to
8995   // offload_args.
8996   //
8997   // struct descriptor_dim {
8998   //  uint64_t offset;
8999   //  uint64_t count;
9000   //  uint64_t stride
9001   // };
9002   ASTContext &C = CGF.getContext();
9003   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9004   RecordDecl *RD;
9005   RD = C.buildImplicitRecord("descriptor_dim");
9006   RD->startDefinition();
9007   addFieldToRecordDecl(C, RD, Int64Ty);
9008   addFieldToRecordDecl(C, RD, Int64Ty);
9009   addFieldToRecordDecl(C, RD, Int64Ty);
9010   RD->completeDefinition();
9011   QualType DimTy = C.getRecordType(RD);
9012 
9013   enum { OffsetFD = 0, CountFD, StrideFD };
9014   // We need two index variable here since the size of "Dims" is the same as the
9015   // size of Components, however, the size of offset, count, and stride is equal
9016   // to the size of base declaration that is non-contiguous.
9017   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9018     // Skip emitting ir if dimension size is 1 since it cannot be
9019     // non-contiguous.
9020     if (NonContigInfo.Dims[I] == 1)
9021       continue;
9022     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9023     QualType ArrayTy =
9024         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9025     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9026     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9027       unsigned RevIdx = EE - II - 1;
9028       LValue DimsLVal = CGF.MakeAddrLValue(
9029           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9030       // Offset
9031       LValue OffsetLVal = CGF.EmitLValueForField(
9032           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9033       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9034       // Count
9035       LValue CountLVal = CGF.EmitLValueForField(
9036           DimsLVal, *std::next(RD->field_begin(), CountFD));
9037       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9038       // Stride
9039       LValue StrideLVal = CGF.EmitLValueForField(
9040           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9041       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9042     }
9043     // args[I] = &dims
9044     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9045         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9046     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9047         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9048         Info.RTArgs.PointersArray, 0, I);
9049     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9050     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9051     ++L;
9052   }
9053 }
9054 
9055 // Try to extract the base declaration from a `this->x` expression if possible.
9056 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9057   if (!E)
9058     return nullptr;
9059 
9060   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9061     if (const MemberExpr *ME =
9062             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9063       return ME->getMemberDecl();
9064   return nullptr;
9065 }
9066 
9067 /// Emit a string constant containing the names of the values mapped to the
9068 /// offloading runtime library.
9069 llvm::Constant *
9070 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9071                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9072 
9073   uint32_t SrcLocStrSize;
9074   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9075     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9076 
9077   SourceLocation Loc;
9078   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9079     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9080       Loc = VD->getLocation();
9081     else
9082       Loc = MapExprs.getMapExpr()->getExprLoc();
9083   } else {
9084     Loc = MapExprs.getMapDecl()->getLocation();
9085   }
9086 
9087   std::string ExprName;
9088   if (MapExprs.getMapExpr()) {
9089     PrintingPolicy P(CGF.getContext().getLangOpts());
9090     llvm::raw_string_ostream OS(ExprName);
9091     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9092     OS.flush();
9093   } else {
9094     ExprName = MapExprs.getMapDecl()->getNameAsString();
9095   }
9096 
9097   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9098   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9099                                          PLoc.getLine(), PLoc.getColumn(),
9100                                          SrcLocStrSize);
9101 }
9102 
9103 /// Emit the arrays used to pass the captures and map information to the
9104 /// offloading runtime library. If there is no map or capture information,
9105 /// return nullptr by reference.
9106 static void emitOffloadingArrays(
9107     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9108     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9109     bool IsNonContiguous = false) {
9110   CodeGenModule &CGM = CGF.CGM;
9111   ASTContext &Ctx = CGF.getContext();
9112 
9113   // Reset the array information.
9114   Info.clearArrayInfo();
9115   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9116 
9117   if (Info.NumberOfPtrs) {
9118     // Detect if we have any capture size requiring runtime evaluation of the
9119     // size so that a constant array could be eventually used.
9120 
9121     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9122     QualType PointerArrayType = Ctx.getConstantArrayType(
9123         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9124         /*IndexTypeQuals=*/0);
9125 
9126     Info.RTArgs.BasePointersArray =
9127         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9128     Info.RTArgs.PointersArray =
9129         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9130     Address MappersArray =
9131         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9132     Info.RTArgs.MappersArray = MappersArray.getPointer();
9133 
9134     // If we don't have any VLA types or other types that require runtime
9135     // evaluation, we can use a constant array for the map sizes, otherwise we
9136     // need to fill up the arrays as we do for the pointers.
9137     QualType Int64Ty =
9138         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9139     SmallVector<llvm::Constant *> ConstSizes(
9140         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9141     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9142     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9143       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9144         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9145           if (IsNonContiguous &&
9146               static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9147                   CombinedInfo.Types[I] &
9148                   OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9149             ConstSizes[I] = llvm::ConstantInt::get(
9150                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9151           else
9152             ConstSizes[I] = CI;
9153           continue;
9154         }
9155       }
9156       RuntimeSizes.set(I);
9157     }
9158 
9159     if (RuntimeSizes.all()) {
9160       QualType SizeArrayType = Ctx.getConstantArrayType(
9161           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9162           /*IndexTypeQuals=*/0);
9163       Info.RTArgs.SizesArray =
9164           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9165     } else {
9166       auto *SizesArrayInit = llvm::ConstantArray::get(
9167           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9168       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9169       auto *SizesArrayGbl = new llvm::GlobalVariable(
9170           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9171           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9172       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9173       if (RuntimeSizes.any()) {
9174         QualType SizeArrayType = Ctx.getConstantArrayType(
9175             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9176             /*IndexTypeQuals=*/0);
9177         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9178         llvm::Value *GblConstPtr =
9179             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9180                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9181         CGF.Builder.CreateMemCpy(
9182             Buffer,
9183             Address(GblConstPtr, CGM.Int64Ty,
9184                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9185                         /*DestWidth=*/64, /*Signed=*/false))),
9186             CGF.getTypeSize(SizeArrayType));
9187         Info.RTArgs.SizesArray = Buffer.getPointer();
9188       } else {
9189         Info.RTArgs.SizesArray = SizesArrayGbl;
9190       }
9191     }
9192 
9193     // The map types are always constant so we don't need to generate code to
9194     // fill arrays. Instead, we create an array constant.
9195     SmallVector<uint64_t, 4> Mapping;
9196     for (auto mapFlag : CombinedInfo.Types)
9197       Mapping.push_back(
9198           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9199               mapFlag));
9200     std::string MaptypesName =
9201         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9202     auto *MapTypesArrayGbl =
9203         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9204     Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9205 
9206     // The information types are only built if there is debug information
9207     // requested.
9208     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9209       Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue(
9210           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9211     } else {
9212       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9213         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9214       };
9215       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9216       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9217       std::string MapnamesName =
9218           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9219       auto *MapNamesArrayGbl =
9220           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9221       Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9222     }
9223 
9224     // If there's a present map type modifier, it must not be applied to the end
9225     // of a region, so generate a separate map type array in that case.
9226     if (Info.separateBeginEndCalls()) {
9227       bool EndMapTypesDiffer = false;
9228       for (uint64_t &Type : Mapping) {
9229         if (Type &
9230             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9231                 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9232           Type &=
9233               ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9234                   OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9235           EndMapTypesDiffer = true;
9236         }
9237       }
9238       if (EndMapTypesDiffer) {
9239         MapTypesArrayGbl =
9240             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9241         Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9242       }
9243     }
9244 
9245     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9246       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9247       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9248           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9249           Info.RTArgs.BasePointersArray, 0, I);
9250       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9251           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9252       Address BPAddr(BP, BPVal->getType(),
9253                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9254       CGF.Builder.CreateStore(BPVal, BPAddr);
9255 
9256       if (Info.requiresDevicePointerInfo())
9257         if (const ValueDecl *DevVD =
9258                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9259           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9260 
9261       llvm::Value *PVal = CombinedInfo.Pointers[I];
9262       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9263           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9264           Info.RTArgs.PointersArray, 0, I);
9265       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9266           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9267       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9268       CGF.Builder.CreateStore(PVal, PAddr);
9269 
9270       if (RuntimeSizes.test(I)) {
9271         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9272             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9273             Info.RTArgs.SizesArray,
9274             /*Idx0=*/0,
9275             /*Idx1=*/I);
9276         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9277         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9278                                                           CGM.Int64Ty,
9279                                                           /*isSigned=*/true),
9280                                 SAddr);
9281       }
9282 
9283       // Fill up the mapper array.
9284       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9285       if (CombinedInfo.Mappers[I]) {
9286         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9287             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9288         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9289         Info.HasMapper = true;
9290       }
9291       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9292       CGF.Builder.CreateStore(MFunc, MAddr);
9293     }
9294   }
9295 
9296   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9297       Info.NumberOfPtrs == 0)
9298     return;
9299 
9300   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9301 }
9302 
9303 /// Check for inner distribute directive.
9304 static const OMPExecutableDirective *
9305 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9306   const auto *CS = D.getInnermostCapturedStmt();
9307   const auto *Body =
9308       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9309   const Stmt *ChildStmt =
9310       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9311 
9312   if (const auto *NestedDir =
9313           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9314     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9315     switch (D.getDirectiveKind()) {
9316     case OMPD_target:
9317       if (isOpenMPDistributeDirective(DKind))
9318         return NestedDir;
9319       if (DKind == OMPD_teams) {
9320         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9321             /*IgnoreCaptured=*/true);
9322         if (!Body)
9323           return nullptr;
9324         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9325         if (const auto *NND =
9326                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9327           DKind = NND->getDirectiveKind();
9328           if (isOpenMPDistributeDirective(DKind))
9329             return NND;
9330         }
9331       }
9332       return nullptr;
9333     case OMPD_target_teams:
9334       if (isOpenMPDistributeDirective(DKind))
9335         return NestedDir;
9336       return nullptr;
9337     case OMPD_target_parallel:
9338     case OMPD_target_simd:
9339     case OMPD_target_parallel_for:
9340     case OMPD_target_parallel_for_simd:
9341       return nullptr;
9342     case OMPD_target_teams_distribute:
9343     case OMPD_target_teams_distribute_simd:
9344     case OMPD_target_teams_distribute_parallel_for:
9345     case OMPD_target_teams_distribute_parallel_for_simd:
9346     case OMPD_parallel:
9347     case OMPD_for:
9348     case OMPD_parallel_for:
9349     case OMPD_parallel_master:
9350     case OMPD_parallel_sections:
9351     case OMPD_for_simd:
9352     case OMPD_parallel_for_simd:
9353     case OMPD_cancel:
9354     case OMPD_cancellation_point:
9355     case OMPD_ordered:
9356     case OMPD_threadprivate:
9357     case OMPD_allocate:
9358     case OMPD_task:
9359     case OMPD_simd:
9360     case OMPD_tile:
9361     case OMPD_unroll:
9362     case OMPD_sections:
9363     case OMPD_section:
9364     case OMPD_single:
9365     case OMPD_master:
9366     case OMPD_critical:
9367     case OMPD_taskyield:
9368     case OMPD_barrier:
9369     case OMPD_taskwait:
9370     case OMPD_taskgroup:
9371     case OMPD_atomic:
9372     case OMPD_flush:
9373     case OMPD_depobj:
9374     case OMPD_scan:
9375     case OMPD_teams:
9376     case OMPD_target_data:
9377     case OMPD_target_exit_data:
9378     case OMPD_target_enter_data:
9379     case OMPD_distribute:
9380     case OMPD_distribute_simd:
9381     case OMPD_distribute_parallel_for:
9382     case OMPD_distribute_parallel_for_simd:
9383     case OMPD_teams_distribute:
9384     case OMPD_teams_distribute_simd:
9385     case OMPD_teams_distribute_parallel_for:
9386     case OMPD_teams_distribute_parallel_for_simd:
9387     case OMPD_target_update:
9388     case OMPD_declare_simd:
9389     case OMPD_declare_variant:
9390     case OMPD_begin_declare_variant:
9391     case OMPD_end_declare_variant:
9392     case OMPD_declare_target:
9393     case OMPD_end_declare_target:
9394     case OMPD_declare_reduction:
9395     case OMPD_declare_mapper:
9396     case OMPD_taskloop:
9397     case OMPD_taskloop_simd:
9398     case OMPD_master_taskloop:
9399     case OMPD_master_taskloop_simd:
9400     case OMPD_parallel_master_taskloop:
9401     case OMPD_parallel_master_taskloop_simd:
9402     case OMPD_requires:
9403     case OMPD_metadirective:
9404     case OMPD_unknown:
9405     default:
9406       llvm_unreachable("Unexpected directive.");
9407     }
9408   }
9409 
9410   return nullptr;
9411 }
9412 
9413 /// Emit the user-defined mapper function. The code generation follows the
9414 /// pattern in the example below.
9415 /// \code
9416 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9417 ///                                           void *base, void *begin,
9418 ///                                           int64_t size, int64_t type,
9419 ///                                           void *name = nullptr) {
9420 ///   // Allocate space for an array section first or add a base/begin for
9421 ///   // pointer dereference.
9422 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9423 ///       !maptype.IsDelete)
9424 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9425 ///                                 size*sizeof(Ty), clearToFromMember(type));
9426 ///   // Map members.
9427 ///   for (unsigned i = 0; i < size; i++) {
9428 ///     // For each component specified by this mapper:
9429 ///     for (auto c : begin[i]->all_components) {
9430 ///       if (c.hasMapper())
9431 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9432 ///                       c.arg_type, c.arg_name);
9433 ///       else
9434 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9435 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9436 ///                                     c.arg_name);
9437 ///     }
9438 ///   }
9439 ///   // Delete the array section.
9440 ///   if (size > 1 && maptype.IsDelete)
9441 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9442 ///                                 size*sizeof(Ty), clearToFromMember(type));
9443 /// }
9444 /// \endcode
9445 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9446                                             CodeGenFunction *CGF) {
9447   if (UDMMap.count(D) > 0)
9448     return;
9449   ASTContext &C = CGM.getContext();
9450   QualType Ty = D->getType();
9451   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9452   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9453   auto *MapperVarDecl =
9454       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9455   SourceLocation Loc = D->getLocation();
9456   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9457   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9458 
9459   // Prepare mapper function arguments and attributes.
9460   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9461                               C.VoidPtrTy, ImplicitParamDecl::Other);
9462   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9463                             ImplicitParamDecl::Other);
9464   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9465                              C.VoidPtrTy, ImplicitParamDecl::Other);
9466   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9467                             ImplicitParamDecl::Other);
9468   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9469                             ImplicitParamDecl::Other);
9470   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9471                             ImplicitParamDecl::Other);
9472   FunctionArgList Args;
9473   Args.push_back(&HandleArg);
9474   Args.push_back(&BaseArg);
9475   Args.push_back(&BeginArg);
9476   Args.push_back(&SizeArg);
9477   Args.push_back(&TypeArg);
9478   Args.push_back(&NameArg);
9479   const CGFunctionInfo &FnInfo =
9480       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9481   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9482   SmallString<64> TyStr;
9483   llvm::raw_svector_ostream Out(TyStr);
9484   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9485   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9486   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9487                                     Name, &CGM.getModule());
9488   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9489   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9490   // Start the mapper function code generation.
9491   CodeGenFunction MapperCGF(CGM);
9492   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9493   // Compute the starting and end addresses of array elements.
9494   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9495       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9496       C.getPointerType(Int64Ty), Loc);
9497   // Prepare common arguments for array initiation and deletion.
9498   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9499       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9500       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9501   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9502       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9503       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9504   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9505       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9506       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9507   // Convert the size in bytes into the number of array elements.
9508   Size = MapperCGF.Builder.CreateExactUDiv(
9509       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9510   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9511       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9512   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9513   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9514       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9515       C.getPointerType(Int64Ty), Loc);
9516   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9517       MapperCGF.GetAddrOfLocalVar(&NameArg),
9518       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9519 
9520   // Emit array initiation if this is an array section and \p MapType indicates
9521   // that memory allocation is required.
9522   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9523   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9524                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9525 
9526   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9527 
9528   // Emit the loop header block.
9529   MapperCGF.EmitBlock(HeadBB);
9530   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9531   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9532   // Evaluate whether the initial condition is satisfied.
9533   llvm::Value *IsEmpty =
9534       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9535   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9536   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9537 
9538   // Emit the loop body block.
9539   MapperCGF.EmitBlock(BodyBB);
9540   llvm::BasicBlock *LastBB = BodyBB;
9541   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9542       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9543   PtrPHI->addIncoming(PtrBegin, EntryBB);
9544   Address PtrCurrent(PtrPHI, ElemTy,
9545                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9546                          .getAlignment()
9547                          .alignmentOfArrayElement(ElementSize));
9548   // Privatize the declared variable of mapper to be the current array element.
9549   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9550   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9551   (void)Scope.Privatize();
9552 
9553   // Get map clause information. Fill up the arrays with all mapped variables.
9554   MappableExprsHandler::MapCombinedInfoTy Info;
9555   MappableExprsHandler MEHandler(*D, MapperCGF);
9556   MEHandler.generateAllInfoForMapper(Info);
9557 
9558   // Call the runtime API __tgt_mapper_num_components to get the number of
9559   // pre-existing components.
9560   llvm::Value *OffloadingArgs[] = {Handle};
9561   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9562       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9563                                             OMPRTL___tgt_mapper_num_components),
9564       OffloadingArgs);
9565   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9566       PreviousSize,
9567       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9568 
9569   // Fill up the runtime mapper handle for all components.
9570   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9571     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9572         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9573     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9574         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9575     llvm::Value *CurSizeArg = Info.Sizes[I];
9576     llvm::Value *CurNameArg =
9577         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9578             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9579             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9580 
9581     // Extract the MEMBER_OF field from the map type.
9582     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9583         static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9584             Info.Types[I]));
9585     llvm::Value *MemberMapType =
9586         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9587 
9588     // Combine the map type inherited from user-defined mapper with that
9589     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9590     // bits of the \a MapType, which is the input argument of the mapper
9591     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9592     // bits of MemberMapType.
9593     // [OpenMP 5.0], 1.2.6. map-type decay.
9594     //        | alloc |  to   | from  | tofrom | release | delete
9595     // ----------------------------------------------------------
9596     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9597     // to     | alloc |  to   | alloc |   to   | release | delete
9598     // from   | alloc | alloc | from  |  from  | release | delete
9599     // tofrom | alloc |  to   | from  | tofrom | release | delete
9600     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9601         MapType,
9602         MapperCGF.Builder.getInt64(
9603             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9604                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9605                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9606     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9607     llvm::BasicBlock *AllocElseBB =
9608         MapperCGF.createBasicBlock("omp.type.alloc.else");
9609     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9610     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9611     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9612     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9613     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9614     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9615     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9616     MapperCGF.EmitBlock(AllocBB);
9617     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9618         MemberMapType,
9619         MapperCGF.Builder.getInt64(
9620             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9621                 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9622                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9623     MapperCGF.Builder.CreateBr(EndBB);
9624     MapperCGF.EmitBlock(AllocElseBB);
9625     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9626         LeftToFrom,
9627         MapperCGF.Builder.getInt64(
9628             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9629                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9630     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9631     // In case of to, clear OMP_MAP_FROM.
9632     MapperCGF.EmitBlock(ToBB);
9633     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9634         MemberMapType,
9635         MapperCGF.Builder.getInt64(
9636             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9637                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9638     MapperCGF.Builder.CreateBr(EndBB);
9639     MapperCGF.EmitBlock(ToElseBB);
9640     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9641         LeftToFrom,
9642         MapperCGF.Builder.getInt64(
9643             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9644                 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9645     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9646     // In case of from, clear OMP_MAP_TO.
9647     MapperCGF.EmitBlock(FromBB);
9648     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9649         MemberMapType,
9650         MapperCGF.Builder.getInt64(
9651             ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9652                 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9653     // In case of tofrom, do nothing.
9654     MapperCGF.EmitBlock(EndBB);
9655     LastBB = EndBB;
9656     llvm::PHINode *CurMapType =
9657         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9658     CurMapType->addIncoming(AllocMapType, AllocBB);
9659     CurMapType->addIncoming(ToMapType, ToBB);
9660     CurMapType->addIncoming(FromMapType, FromBB);
9661     CurMapType->addIncoming(MemberMapType, ToElseBB);
9662 
9663     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9664                                      CurSizeArg, CurMapType, CurNameArg};
9665     if (Info.Mappers[I]) {
9666       // Call the corresponding mapper function.
9667       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9668           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9669       assert(MapperFunc && "Expect a valid mapper function is available.");
9670       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9671     } else {
9672       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9673       // data structure.
9674       MapperCGF.EmitRuntimeCall(
9675           OMPBuilder.getOrCreateRuntimeFunction(
9676               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9677           OffloadingArgs);
9678     }
9679   }
9680 
9681   // Update the pointer to point to the next element that needs to be mapped,
9682   // and check whether we have mapped all elements.
9683   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9684       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9685   PtrPHI->addIncoming(PtrNext, LastBB);
9686   llvm::Value *IsDone =
9687       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9688   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9689   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9690 
9691   MapperCGF.EmitBlock(ExitBB);
9692   // Emit array deletion if this is an array section and \p MapType indicates
9693   // that deletion is required.
9694   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9695                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9696 
9697   // Emit the function exit block.
9698   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9699   MapperCGF.FinishFunction();
9700   UDMMap.try_emplace(D, Fn);
9701   if (CGF) {
9702     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9703     Decls.second.push_back(D);
9704   }
9705 }
9706 
9707 /// Emit the array initialization or deletion portion for user-defined mapper
9708 /// code generation. First, it evaluates whether an array section is mapped and
9709 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9710 /// true, and \a MapType indicates to not delete this array, array
9711 /// initialization code is generated. If \a IsInit is false, and \a MapType
9712 /// indicates to not this array, array deletion code is generated.
9713 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9714     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9715     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9716     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9717     bool IsInit) {
9718   StringRef Prefix = IsInit ? ".init" : ".del";
9719 
9720   // Evaluate if this is an array section.
9721   llvm::BasicBlock *BodyBB =
9722       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9723   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9724       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9725   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9726       MapType,
9727       MapperCGF.Builder.getInt64(
9728           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9729               OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9730   llvm::Value *DeleteCond;
9731   llvm::Value *Cond;
9732   if (IsInit) {
9733     // base != begin?
9734     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9735     // IsPtrAndObj?
9736     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9737         MapType,
9738         MapperCGF.Builder.getInt64(
9739             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9740                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9741     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9742     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9743     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9744     DeleteCond = MapperCGF.Builder.CreateIsNull(
9745         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9746   } else {
9747     Cond = IsArray;
9748     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9749         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9750   }
9751   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9752   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9753 
9754   MapperCGF.EmitBlock(BodyBB);
9755   // Get the array size by multiplying element size and element number (i.e., \p
9756   // Size).
9757   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9758       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9759   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9760   // memory allocation/deletion purpose only.
9761   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9762       MapType,
9763       MapperCGF.Builder.getInt64(
9764           ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9765               OpenMPOffloadMappingFlags::OMP_MAP_TO |
9766               OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9767   MapTypeArg = MapperCGF.Builder.CreateOr(
9768       MapTypeArg,
9769       MapperCGF.Builder.getInt64(
9770           static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9771               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9772 
9773   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9774   // data structure.
9775   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9776                                    ArraySize, MapTypeArg, MapName};
9777   MapperCGF.EmitRuntimeCall(
9778       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9779                                             OMPRTL___tgt_push_mapper_component),
9780       OffloadingArgs);
9781 }
9782 
9783 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9784     const OMPDeclareMapperDecl *D) {
9785   auto I = UDMMap.find(D);
9786   if (I != UDMMap.end())
9787     return I->second;
9788   emitUserDefinedMapper(D);
9789   return UDMMap.lookup(D);
9790 }
9791 
9792 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9793     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9794     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9795                                      const OMPLoopDirective &D)>
9796         SizeEmitter) {
9797   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9798   const OMPExecutableDirective *TD = &D;
9799   // Get nested teams distribute kind directive, if any.
9800   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9801     TD = getNestedDistributeDirective(CGM.getContext(), D);
9802   if (!TD)
9803     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9804 
9805   const auto *LD = cast<OMPLoopDirective>(TD);
9806   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9807     return NumIterations;
9808   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9809 }
9810 
9811 void CGOpenMPRuntime::emitTargetCall(
9812     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9813     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9814     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9815     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9816                                      const OMPLoopDirective &D)>
9817         SizeEmitter) {
9818   if (!CGF.HaveInsertPoint())
9819     return;
9820 
9821   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
9822                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9823 
9824   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9825 
9826   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9827                                  D.hasClausesOfKind<OMPNowaitClause>() ||
9828                                  D.hasClausesOfKind<OMPInReductionClause>();
9829   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9830   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9831   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9832                                             PrePostActionTy &) {
9833     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9834   };
9835   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9836 
9837   CodeGenFunction::OMPTargetDataInfo InputInfo;
9838   llvm::Value *MapTypesArray = nullptr;
9839   llvm::Value *MapNamesArray = nullptr;
9840   // Generate code for the host fallback function.
9841   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
9842                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
9843     if (OffloadingMandatory) {
9844       CGF.Builder.CreateUnreachable();
9845     } else {
9846       if (RequiresOuterTask) {
9847         CapturedVars.clear();
9848         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9849       }
9850       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9851     }
9852   };
9853   // Fill up the pointer arrays and transfer execution to the device.
9854   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
9855                     &MapNamesArray, SizeEmitter,
9856                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9857     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9858       // Reverse offloading is not supported, so just execute on the host.
9859       FallbackGen(CGF);
9860       return;
9861     }
9862 
9863     // On top of the arrays that were filled up, the target offloading call
9864     // takes as arguments the device id as well as the host pointer. The host
9865     // pointer is used by the runtime library to identify the current target
9866     // region, so it only has to be unique and not necessarily point to
9867     // anything. It could be the pointer to the outlined function that
9868     // implements the target region, but we aren't using that so that the
9869     // compiler doesn't need to keep that, and could therefore inline the host
9870     // function if proven worthwhile during optimization.
9871 
9872     // From this point on, we need to have an ID of the target region defined.
9873     assert(OutlinedFnID && "Invalid outlined function ID!");
9874     (void)OutlinedFnID;
9875 
9876     // Emit device ID if any.
9877     llvm::Value *DeviceID;
9878     if (Device.getPointer()) {
9879       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9880               Device.getInt() == OMPC_DEVICE_device_num) &&
9881              "Expected device_num modifier.");
9882       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9883       DeviceID =
9884           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9885     } else {
9886       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9887     }
9888 
9889     // Emit the number of elements in the offloading arrays.
9890     llvm::Value *PointerNum =
9891         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9892 
9893     // Return value of the runtime offloading call.
9894     llvm::Value *Return;
9895 
9896     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9897     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9898 
9899     // Source location for the ident struct
9900     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9901 
9902     // Get tripcount for the target loop-based directive.
9903     llvm::Value *NumIterations =
9904         emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9905 
9906     llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9907     if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9908       CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9909       llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9910           DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9911       DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9912                                                /*isSigned=*/false);
9913     }
9914 
9915     llvm::Value *ZeroArray =
9916         llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3));
9917 
9918     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9919     llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait);
9920 
9921     llvm::Value *NumTeams3D =
9922         CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0});
9923     llvm::Value *NumThreads3D =
9924         CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0});
9925 
9926     // Arguments for the target kernel.
9927     SmallVector<llvm::Value *> KernelArgs{
9928         CGF.Builder.getInt32(/* Version */ 2),
9929         PointerNum,
9930         InputInfo.BasePointersArray.getPointer(),
9931         InputInfo.PointersArray.getPointer(),
9932         InputInfo.SizesArray.getPointer(),
9933         MapTypesArray,
9934         MapNamesArray,
9935         InputInfo.MappersArray.getPointer(),
9936         NumIterations,
9937         Flags,
9938         NumTeams3D,
9939         NumThreads3D,
9940         DynCGroupMem,
9941     };
9942 
9943     // The target region is an outlined function launched by the runtime
9944     // via calls to __tgt_target_kernel().
9945     //
9946     // Note that on the host and CPU targets, the runtime implementation of
9947     // these calls simply call the outlined function without forking threads.
9948     // The outlined functions themselves have runtime calls to
9949     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9950     // the compiler in emitTeamsCall() and emitParallelCall().
9951     //
9952     // In contrast, on the NVPTX target, the implementation of
9953     // __tgt_target_teams() launches a GPU kernel with the requested number
9954     // of teams and threads so no additional calls to the runtime are required.
9955     // Check the error code and execute the host version if required.
9956     CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel(
9957         CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads,
9958         OutlinedFnID, KernelArgs));
9959 
9960     llvm::BasicBlock *OffloadFailedBlock =
9961         CGF.createBasicBlock("omp_offload.failed");
9962     llvm::BasicBlock *OffloadContBlock =
9963         CGF.createBasicBlock("omp_offload.cont");
9964     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9965     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9966 
9967     CGF.EmitBlock(OffloadFailedBlock);
9968     FallbackGen(CGF);
9969 
9970     CGF.EmitBranch(OffloadContBlock);
9971 
9972     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9973   };
9974 
9975   // Notify that the host version must be executed.
9976   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9977     FallbackGen(CGF);
9978   };
9979 
9980   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9981                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
9982                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9983     // Fill up the arrays with all the captured variables.
9984     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9985 
9986     // Get mappable expression information.
9987     MappableExprsHandler MEHandler(D, CGF);
9988     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9989     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9990 
9991     auto RI = CS.getCapturedRecordDecl()->field_begin();
9992     auto *CV = CapturedVars.begin();
9993     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9994                                               CE = CS.capture_end();
9995          CI != CE; ++CI, ++RI, ++CV) {
9996       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9997       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9998 
9999       // VLA sizes are passed to the outlined region by copy and do not have map
10000       // information associated.
10001       if (CI->capturesVariableArrayType()) {
10002         CurInfo.Exprs.push_back(nullptr);
10003         CurInfo.BasePointers.push_back(*CV);
10004         CurInfo.Pointers.push_back(*CV);
10005         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10006             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10007         // Copy to the device as an argument. No need to retrieve it.
10008         CurInfo.Types.push_back(
10009             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10010             OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10011             OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10012         CurInfo.Mappers.push_back(nullptr);
10013       } else {
10014         // If we have any information in the map clause, we use it, otherwise we
10015         // just do a default mapping.
10016         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10017         if (!CI->capturesThis())
10018           MappedVarSet.insert(CI->getCapturedVar());
10019         else
10020           MappedVarSet.insert(nullptr);
10021         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10022           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10023         // Generate correct mapping for variables captured by reference in
10024         // lambdas.
10025         if (CI->capturesVariable())
10026           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10027                                                   CurInfo, LambdaPointers);
10028       }
10029       // We expect to have at least an element of information for this capture.
10030       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10031              "Non-existing map pointer for capture!");
10032       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10033              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10034              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10035              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10036              "Inconsistent map information sizes!");
10037 
10038       // If there is an entry in PartialStruct it means we have a struct with
10039       // individual members mapped. Emit an extra combined entry.
10040       if (PartialStruct.Base.isValid()) {
10041         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10042         MEHandler.emitCombinedEntry(
10043             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10044             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10045       }
10046 
10047       // We need to append the results of this capture to what we already have.
10048       CombinedInfo.append(CurInfo);
10049     }
10050     // Adjust MEMBER_OF flags for the lambdas captures.
10051     MEHandler.adjustMemberOfForLambdaCaptures(
10052         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10053         CombinedInfo.Types);
10054     // Map any list items in a map clause that were not captures because they
10055     // weren't referenced within the construct.
10056     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10057 
10058     CGOpenMPRuntime::TargetDataInfo Info;
10059     // Fill up the arrays and create the arguments.
10060     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10061     bool EmitDebug =
10062         CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10063     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10064                                             EmitDebug,
10065                                             /*ForEndCall=*/false);
10066 
10067     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10068     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10069                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10070     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10071                                       CGM.getPointerAlign());
10072     InputInfo.SizesArray =
10073         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10074     InputInfo.MappersArray =
10075         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10076     MapTypesArray = Info.RTArgs.MapTypesArray;
10077     MapNamesArray = Info.RTArgs.MapNamesArray;
10078     if (RequiresOuterTask)
10079       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10080     else
10081       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10082   };
10083 
10084   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10085                              CodeGenFunction &CGF, PrePostActionTy &) {
10086     if (RequiresOuterTask) {
10087       CodeGenFunction::OMPTargetDataInfo InputInfo;
10088       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10089     } else {
10090       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10091     }
10092   };
10093 
10094   // If we have a target function ID it means that we need to support
10095   // offloading, otherwise, just execute on the host. We need to execute on host
10096   // regardless of the conditional in the if clause if, e.g., the user do not
10097   // specify target triples.
10098   if (OutlinedFnID) {
10099     if (IfCond) {
10100       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10101     } else {
10102       RegionCodeGenTy ThenRCG(TargetThenGen);
10103       ThenRCG(CGF);
10104     }
10105   } else {
10106     RegionCodeGenTy ElseRCG(TargetElseGen);
10107     ElseRCG(CGF);
10108   }
10109 }
10110 
10111 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10112                                                     StringRef ParentName) {
10113   if (!S)
10114     return;
10115 
10116   // Codegen OMP target directives that offload compute to the device.
10117   bool RequiresDeviceCodegen =
10118       isa<OMPExecutableDirective>(S) &&
10119       isOpenMPTargetExecutionDirective(
10120           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10121 
10122   if (RequiresDeviceCodegen) {
10123     const auto &E = *cast<OMPExecutableDirective>(S);
10124     auto EntryInfo =
10125         getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName);
10126 
10127     // Is this a target region that should not be emitted as an entry point? If
10128     // so just signal we are done with this target region.
10129     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10130       return;
10131 
10132     switch (E.getDirectiveKind()) {
10133     case OMPD_target:
10134       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10135                                                    cast<OMPTargetDirective>(E));
10136       break;
10137     case OMPD_target_parallel:
10138       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10139           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10140       break;
10141     case OMPD_target_teams:
10142       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10143           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10144       break;
10145     case OMPD_target_teams_distribute:
10146       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10147           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10148       break;
10149     case OMPD_target_teams_distribute_simd:
10150       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10151           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10152       break;
10153     case OMPD_target_parallel_for:
10154       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10155           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10156       break;
10157     case OMPD_target_parallel_for_simd:
10158       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10159           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10160       break;
10161     case OMPD_target_simd:
10162       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10163           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10164       break;
10165     case OMPD_target_teams_distribute_parallel_for:
10166       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10167           CGM, ParentName,
10168           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10169       break;
10170     case OMPD_target_teams_distribute_parallel_for_simd:
10171       CodeGenFunction::
10172           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10173               CGM, ParentName,
10174               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10175       break;
10176     case OMPD_parallel:
10177     case OMPD_for:
10178     case OMPD_parallel_for:
10179     case OMPD_parallel_master:
10180     case OMPD_parallel_sections:
10181     case OMPD_for_simd:
10182     case OMPD_parallel_for_simd:
10183     case OMPD_cancel:
10184     case OMPD_cancellation_point:
10185     case OMPD_ordered:
10186     case OMPD_threadprivate:
10187     case OMPD_allocate:
10188     case OMPD_task:
10189     case OMPD_simd:
10190     case OMPD_tile:
10191     case OMPD_unroll:
10192     case OMPD_sections:
10193     case OMPD_section:
10194     case OMPD_single:
10195     case OMPD_master:
10196     case OMPD_critical:
10197     case OMPD_taskyield:
10198     case OMPD_barrier:
10199     case OMPD_taskwait:
10200     case OMPD_taskgroup:
10201     case OMPD_atomic:
10202     case OMPD_flush:
10203     case OMPD_depobj:
10204     case OMPD_scan:
10205     case OMPD_teams:
10206     case OMPD_target_data:
10207     case OMPD_target_exit_data:
10208     case OMPD_target_enter_data:
10209     case OMPD_distribute:
10210     case OMPD_distribute_simd:
10211     case OMPD_distribute_parallel_for:
10212     case OMPD_distribute_parallel_for_simd:
10213     case OMPD_teams_distribute:
10214     case OMPD_teams_distribute_simd:
10215     case OMPD_teams_distribute_parallel_for:
10216     case OMPD_teams_distribute_parallel_for_simd:
10217     case OMPD_target_update:
10218     case OMPD_declare_simd:
10219     case OMPD_declare_variant:
10220     case OMPD_begin_declare_variant:
10221     case OMPD_end_declare_variant:
10222     case OMPD_declare_target:
10223     case OMPD_end_declare_target:
10224     case OMPD_declare_reduction:
10225     case OMPD_declare_mapper:
10226     case OMPD_taskloop:
10227     case OMPD_taskloop_simd:
10228     case OMPD_master_taskloop:
10229     case OMPD_master_taskloop_simd:
10230     case OMPD_parallel_master_taskloop:
10231     case OMPD_parallel_master_taskloop_simd:
10232     case OMPD_requires:
10233     case OMPD_metadirective:
10234     case OMPD_unknown:
10235     default:
10236       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10237     }
10238     return;
10239   }
10240 
10241   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10242     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10243       return;
10244 
10245     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10246     return;
10247   }
10248 
10249   // If this is a lambda function, look into its body.
10250   if (const auto *L = dyn_cast<LambdaExpr>(S))
10251     S = L->getBody();
10252 
10253   // Keep looking for target regions recursively.
10254   for (const Stmt *II : S->children())
10255     scanForTargetRegionsFunctions(II, ParentName);
10256 }
10257 
10258 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10259   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10260       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10261   if (!DevTy)
10262     return false;
10263   // Do not emit device_type(nohost) functions for the host.
10264   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10265     return true;
10266   // Do not emit device_type(host) functions for the device.
10267   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10268     return true;
10269   return false;
10270 }
10271 
10272 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10273   // If emitting code for the host, we do not process FD here. Instead we do
10274   // the normal code generation.
10275   if (!CGM.getLangOpts().OpenMPIsDevice) {
10276     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10277       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10278                                   CGM.getLangOpts().OpenMPIsDevice))
10279         return true;
10280     return false;
10281   }
10282 
10283   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10284   // Try to detect target regions in the function.
10285   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10286     StringRef Name = CGM.getMangledName(GD);
10287     scanForTargetRegionsFunctions(FD->getBody(), Name);
10288     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10289                                 CGM.getLangOpts().OpenMPIsDevice))
10290       return true;
10291   }
10292 
10293   // Do not to emit function if it is not marked as declare target.
10294   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10295          AlreadyEmittedTargetDecls.count(VD) == 0;
10296 }
10297 
10298 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10299   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10300                               CGM.getLangOpts().OpenMPIsDevice))
10301     return true;
10302 
10303   if (!CGM.getLangOpts().OpenMPIsDevice)
10304     return false;
10305 
10306   // Check if there are Ctors/Dtors in this declaration and look for target
10307   // regions in it. We use the complete variant to produce the kernel name
10308   // mangling.
10309   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10310   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10311     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10312       StringRef ParentName =
10313           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10314       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10315     }
10316     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10317       StringRef ParentName =
10318           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10319       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10320     }
10321   }
10322 
10323   // Do not to emit variable if it is not marked as declare target.
10324   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10325       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10326           cast<VarDecl>(GD.getDecl()));
10327   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10328       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10329         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10330        HasRequiresUnifiedSharedMemory)) {
10331     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10332     return true;
10333   }
10334   return false;
10335 }
10336 
10337 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10338                                                    llvm::Constant *Addr) {
10339   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10340       !CGM.getLangOpts().OpenMPIsDevice)
10341     return;
10342 
10343   // If we have host/nohost variables, they do not need to be registered.
10344   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10345       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10346   if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
10347     return;
10348 
10349   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10350       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10351   if (!Res) {
10352     if (CGM.getLangOpts().OpenMPIsDevice) {
10353       // Register non-target variables being emitted in device code (debug info
10354       // may cause this).
10355       StringRef VarName = CGM.getMangledName(VD);
10356       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10357     }
10358     return;
10359   }
10360   // Register declare target variables.
10361   llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10362   StringRef VarName;
10363   int64_t VarSize;
10364   llvm::GlobalValue::LinkageTypes Linkage;
10365 
10366   if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10367        *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10368       !HasRequiresUnifiedSharedMemory) {
10369     Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10370     VarName = CGM.getMangledName(VD);
10371     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10372       VarSize =
10373           CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity();
10374       assert(VarSize != 0 && "Expected non-zero size of the variable");
10375     } else {
10376       VarSize = 0;
10377     }
10378     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10379     // Temp solution to prevent optimizations of the internal variables.
10380     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10381       // Do not create a "ref-variable" if the original is not also available
10382       // on the host.
10383       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10384         return;
10385       std::string RefName = getName({VarName, "ref"});
10386       if (!CGM.GetGlobalValue(RefName)) {
10387         llvm::Constant *AddrRef =
10388             OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName);
10389         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10390         GVAddrRef->setConstant(/*Val=*/true);
10391         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10392         GVAddrRef->setInitializer(Addr);
10393         CGM.addCompilerUsedGlobal(GVAddrRef);
10394       }
10395     }
10396   } else {
10397     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10398             ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10399               *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10400              HasRequiresUnifiedSharedMemory)) &&
10401            "Declare target attribute must link or to with unified memory.");
10402     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10403       Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10404     else
10405       Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10406 
10407     if (CGM.getLangOpts().OpenMPIsDevice) {
10408       VarName = Addr->getName();
10409       Addr = nullptr;
10410     } else {
10411       VarName = getAddrOfDeclareTargetVar(VD).getName();
10412       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10413     }
10414     VarSize = CGM.getPointerSize().getQuantity();
10415     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10416   }
10417 
10418   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10419       VarName, Addr, VarSize, Flags, Linkage);
10420 }
10421 
10422 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10423   if (isa<FunctionDecl>(GD.getDecl()) ||
10424       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10425     return emitTargetFunctions(GD);
10426 
10427   return emitTargetGlobalVariable(GD);
10428 }
10429 
10430 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10431   for (const VarDecl *VD : DeferredGlobalVariables) {
10432     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10433         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10434     if (!Res)
10435       continue;
10436     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10437          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10438         !HasRequiresUnifiedSharedMemory) {
10439       CGM.EmitGlobal(VD);
10440     } else {
10441       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10442               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10443                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10444                HasRequiresUnifiedSharedMemory)) &&
10445              "Expected link clause or to clause with unified memory.");
10446       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10447     }
10448   }
10449 }
10450 
10451 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10452     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10453   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10454          " Expected target-based directive.");
10455 }
10456 
10457 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10458   for (const OMPClause *Clause : D->clauselists()) {
10459     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10460       HasRequiresUnifiedSharedMemory = true;
10461       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10462     } else if (const auto *AC =
10463                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10464       switch (AC->getAtomicDefaultMemOrderKind()) {
10465       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10466         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10467         break;
10468       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10469         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10470         break;
10471       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10472         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10473         break;
10474       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10475         break;
10476       }
10477     }
10478   }
10479 }
10480 
10481 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10482   return RequiresAtomicOrdering;
10483 }
10484 
10485 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10486                                                        LangAS &AS) {
10487   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10488     return false;
10489   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10490   switch(A->getAllocatorType()) {
10491   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10492   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10493   // Not supported, fallback to the default mem space.
10494   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10495   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10496   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10497   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10498   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10499   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10500   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10501     AS = LangAS::Default;
10502     return true;
10503   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10504     llvm_unreachable("Expected predefined allocator for the variables with the "
10505                      "static storage.");
10506   }
10507   return false;
10508 }
10509 
10510 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10511   return HasRequiresUnifiedSharedMemory;
10512 }
10513 
10514 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10515     CodeGenModule &CGM)
10516     : CGM(CGM) {
10517   if (CGM.getLangOpts().OpenMPIsDevice) {
10518     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10519     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10520   }
10521 }
10522 
10523 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10524   if (CGM.getLangOpts().OpenMPIsDevice)
10525     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10526 }
10527 
10528 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10529   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10530     return true;
10531 
10532   const auto *D = cast<FunctionDecl>(GD.getDecl());
10533   // Do not to emit function if it is marked as declare target as it was already
10534   // emitted.
10535   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10536     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10537       if (auto *F = dyn_cast_or_null<llvm::Function>(
10538               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10539         return !F->isDeclaration();
10540       return false;
10541     }
10542     return true;
10543   }
10544 
10545   return !AlreadyEmittedTargetDecls.insert(D).second;
10546 }
10547 
10548 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10549   // If we don't have entries or if we are emitting code for the device, we
10550   // don't need to do anything.
10551   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10552       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10553       (OffloadEntriesInfoManager.empty() &&
10554        !HasEmittedDeclareTargetRegion &&
10555        !HasEmittedTargetRegion))
10556     return nullptr;
10557 
10558   // Create and register the function that handles the requires directives.
10559   ASTContext &C = CGM.getContext();
10560 
10561   llvm::Function *RequiresRegFn;
10562   {
10563     CodeGenFunction CGF(CGM);
10564     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10565     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10566     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10567     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10568     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10569     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10570     // TODO: check for other requires clauses.
10571     // The requires directive takes effect only when a target region is
10572     // present in the compilation unit. Otherwise it is ignored and not
10573     // passed to the runtime. This avoids the runtime from throwing an error
10574     // for mismatching requires clauses across compilation units that don't
10575     // contain at least 1 target region.
10576     assert((HasEmittedTargetRegion ||
10577             HasEmittedDeclareTargetRegion ||
10578             !OffloadEntriesInfoManager.empty()) &&
10579            "Target or declare target region expected.");
10580     if (HasRequiresUnifiedSharedMemory)
10581       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10582     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10583                             CGM.getModule(), OMPRTL___tgt_register_requires),
10584                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10585     CGF.FinishFunction();
10586   }
10587   return RequiresRegFn;
10588 }
10589 
10590 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10591                                     const OMPExecutableDirective &D,
10592                                     SourceLocation Loc,
10593                                     llvm::Function *OutlinedFn,
10594                                     ArrayRef<llvm::Value *> CapturedVars) {
10595   if (!CGF.HaveInsertPoint())
10596     return;
10597 
10598   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10599   CodeGenFunction::RunCleanupsScope Scope(CGF);
10600 
10601   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10602   llvm::Value *Args[] = {
10603       RTLoc,
10604       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10605       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10606   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10607   RealArgs.append(std::begin(Args), std::end(Args));
10608   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10609 
10610   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10611       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10612   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10613 }
10614 
10615 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10616                                          const Expr *NumTeams,
10617                                          const Expr *ThreadLimit,
10618                                          SourceLocation Loc) {
10619   if (!CGF.HaveInsertPoint())
10620     return;
10621 
10622   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10623 
10624   llvm::Value *NumTeamsVal =
10625       NumTeams
10626           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10627                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10628           : CGF.Builder.getInt32(0);
10629 
10630   llvm::Value *ThreadLimitVal =
10631       ThreadLimit
10632           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10633                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10634           : CGF.Builder.getInt32(0);
10635 
10636   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10637   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10638                                      ThreadLimitVal};
10639   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10640                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10641                       PushNumTeamsArgs);
10642 }
10643 
10644 void CGOpenMPRuntime::emitTargetDataCalls(
10645     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10646     const Expr *Device, const RegionCodeGenTy &CodeGen,
10647     CGOpenMPRuntime::TargetDataInfo &Info) {
10648   if (!CGF.HaveInsertPoint())
10649     return;
10650 
10651   // Action used to replace the default codegen action and turn privatization
10652   // off.
10653   PrePostActionTy NoPrivAction;
10654 
10655   // Generate the code for the opening of the data environment. Capture all the
10656   // arguments of the runtime call by reference because they are used in the
10657   // closing of the region.
10658   auto &&BeginThenGen = [this, &D, Device, &Info,
10659                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10660     // Fill up the arrays with all the mapped variables.
10661     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10662 
10663     // Get map clause information.
10664     MappableExprsHandler MEHandler(D, CGF);
10665     MEHandler.generateAllInfo(CombinedInfo);
10666 
10667     // Fill up the arrays and create the arguments.
10668     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10669                          /*IsNonContiguous=*/true);
10670 
10671     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10672     bool EmitDebug =
10673         CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10674     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10675                                             EmitDebug);
10676 
10677     // Emit device ID if any.
10678     llvm::Value *DeviceID = nullptr;
10679     if (Device) {
10680       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10681                                            CGF.Int64Ty, /*isSigned=*/true);
10682     } else {
10683       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10684     }
10685 
10686     // Emit the number of elements in the offloading arrays.
10687     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10688     //
10689     // Source location for the ident struct
10690     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10691 
10692     llvm::Value *OffloadingArgs[] = {RTLoc,
10693                                      DeviceID,
10694                                      PointerNum,
10695                                      RTArgs.BasePointersArray,
10696                                      RTArgs.PointersArray,
10697                                      RTArgs.SizesArray,
10698                                      RTArgs.MapTypesArray,
10699                                      RTArgs.MapNamesArray,
10700                                      RTArgs.MappersArray};
10701     CGF.EmitRuntimeCall(
10702         OMPBuilder.getOrCreateRuntimeFunction(
10703             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10704         OffloadingArgs);
10705 
10706     // If device pointer privatization is required, emit the body of the region
10707     // here. It will have to be duplicated: with and without privatization.
10708     if (!Info.CaptureDeviceAddrMap.empty())
10709       CodeGen(CGF);
10710   };
10711 
10712   // Generate code for the closing of the data region.
10713   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10714                                                 PrePostActionTy &) {
10715     assert(Info.isValid() && "Invalid data environment closing arguments.");
10716 
10717     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10718     bool EmitDebug =
10719         CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10720     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10721                                             EmitDebug,
10722                                             /*ForEndCall=*/true);
10723 
10724     // Emit device ID if any.
10725     llvm::Value *DeviceID = nullptr;
10726     if (Device) {
10727       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10728                                            CGF.Int64Ty, /*isSigned=*/true);
10729     } else {
10730       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10731     }
10732 
10733     // Emit the number of elements in the offloading arrays.
10734     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10735 
10736     // Source location for the ident struct
10737     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10738 
10739     llvm::Value *OffloadingArgs[] = {RTLoc,
10740                                      DeviceID,
10741                                      PointerNum,
10742                                      RTArgs.BasePointersArray,
10743                                      RTArgs.PointersArray,
10744                                      RTArgs.SizesArray,
10745                                      RTArgs.MapTypesArray,
10746                                      RTArgs.MapNamesArray,
10747                                      RTArgs.MappersArray};
10748     CGF.EmitRuntimeCall(
10749         OMPBuilder.getOrCreateRuntimeFunction(
10750             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10751         OffloadingArgs);
10752   };
10753 
10754   // If we need device pointer privatization, we need to emit the body of the
10755   // region with no privatization in the 'else' branch of the conditional.
10756   // Otherwise, we don't have to do anything.
10757   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10758                                                          PrePostActionTy &) {
10759     if (!Info.CaptureDeviceAddrMap.empty()) {
10760       CodeGen.setAction(NoPrivAction);
10761       CodeGen(CGF);
10762     }
10763   };
10764 
10765   // We don't have to do anything to close the region if the if clause evaluates
10766   // to false.
10767   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10768 
10769   if (IfCond) {
10770     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10771   } else {
10772     RegionCodeGenTy RCG(BeginThenGen);
10773     RCG(CGF);
10774   }
10775 
10776   // If we don't require privatization of device pointers, we emit the body in
10777   // between the runtime calls. This avoids duplicating the body code.
10778   if (Info.CaptureDeviceAddrMap.empty()) {
10779     CodeGen.setAction(NoPrivAction);
10780     CodeGen(CGF);
10781   }
10782 
10783   if (IfCond) {
10784     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10785   } else {
10786     RegionCodeGenTy RCG(EndThenGen);
10787     RCG(CGF);
10788   }
10789 }
10790 
10791 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10792     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10793     const Expr *Device) {
10794   if (!CGF.HaveInsertPoint())
10795     return;
10796 
10797   assert((isa<OMPTargetEnterDataDirective>(D) ||
10798           isa<OMPTargetExitDataDirective>(D) ||
10799           isa<OMPTargetUpdateDirective>(D)) &&
10800          "Expecting either target enter, exit data, or update directives.");
10801 
10802   CodeGenFunction::OMPTargetDataInfo InputInfo;
10803   llvm::Value *MapTypesArray = nullptr;
10804   llvm::Value *MapNamesArray = nullptr;
10805   // Generate the code for the opening of the data environment.
10806   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10807                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10808     // Emit device ID if any.
10809     llvm::Value *DeviceID = nullptr;
10810     if (Device) {
10811       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10812                                            CGF.Int64Ty, /*isSigned=*/true);
10813     } else {
10814       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10815     }
10816 
10817     // Emit the number of elements in the offloading arrays.
10818     llvm::Constant *PointerNum =
10819         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10820 
10821     // Source location for the ident struct
10822     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10823 
10824     llvm::Value *OffloadingArgs[] = {RTLoc,
10825                                      DeviceID,
10826                                      PointerNum,
10827                                      InputInfo.BasePointersArray.getPointer(),
10828                                      InputInfo.PointersArray.getPointer(),
10829                                      InputInfo.SizesArray.getPointer(),
10830                                      MapTypesArray,
10831                                      MapNamesArray,
10832                                      InputInfo.MappersArray.getPointer()};
10833 
10834     // Select the right runtime function call for each standalone
10835     // directive.
10836     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10837     RuntimeFunction RTLFn;
10838     switch (D.getDirectiveKind()) {
10839     case OMPD_target_enter_data:
10840       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10841                         : OMPRTL___tgt_target_data_begin_mapper;
10842       break;
10843     case OMPD_target_exit_data:
10844       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10845                         : OMPRTL___tgt_target_data_end_mapper;
10846       break;
10847     case OMPD_target_update:
10848       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10849                         : OMPRTL___tgt_target_data_update_mapper;
10850       break;
10851     case OMPD_parallel:
10852     case OMPD_for:
10853     case OMPD_parallel_for:
10854     case OMPD_parallel_master:
10855     case OMPD_parallel_sections:
10856     case OMPD_for_simd:
10857     case OMPD_parallel_for_simd:
10858     case OMPD_cancel:
10859     case OMPD_cancellation_point:
10860     case OMPD_ordered:
10861     case OMPD_threadprivate:
10862     case OMPD_allocate:
10863     case OMPD_task:
10864     case OMPD_simd:
10865     case OMPD_tile:
10866     case OMPD_unroll:
10867     case OMPD_sections:
10868     case OMPD_section:
10869     case OMPD_single:
10870     case OMPD_master:
10871     case OMPD_critical:
10872     case OMPD_taskyield:
10873     case OMPD_barrier:
10874     case OMPD_taskwait:
10875     case OMPD_taskgroup:
10876     case OMPD_atomic:
10877     case OMPD_flush:
10878     case OMPD_depobj:
10879     case OMPD_scan:
10880     case OMPD_teams:
10881     case OMPD_target_data:
10882     case OMPD_distribute:
10883     case OMPD_distribute_simd:
10884     case OMPD_distribute_parallel_for:
10885     case OMPD_distribute_parallel_for_simd:
10886     case OMPD_teams_distribute:
10887     case OMPD_teams_distribute_simd:
10888     case OMPD_teams_distribute_parallel_for:
10889     case OMPD_teams_distribute_parallel_for_simd:
10890     case OMPD_declare_simd:
10891     case OMPD_declare_variant:
10892     case OMPD_begin_declare_variant:
10893     case OMPD_end_declare_variant:
10894     case OMPD_declare_target:
10895     case OMPD_end_declare_target:
10896     case OMPD_declare_reduction:
10897     case OMPD_declare_mapper:
10898     case OMPD_taskloop:
10899     case OMPD_taskloop_simd:
10900     case OMPD_master_taskloop:
10901     case OMPD_master_taskloop_simd:
10902     case OMPD_parallel_master_taskloop:
10903     case OMPD_parallel_master_taskloop_simd:
10904     case OMPD_target:
10905     case OMPD_target_simd:
10906     case OMPD_target_teams_distribute:
10907     case OMPD_target_teams_distribute_simd:
10908     case OMPD_target_teams_distribute_parallel_for:
10909     case OMPD_target_teams_distribute_parallel_for_simd:
10910     case OMPD_target_teams:
10911     case OMPD_target_parallel:
10912     case OMPD_target_parallel_for:
10913     case OMPD_target_parallel_for_simd:
10914     case OMPD_requires:
10915     case OMPD_metadirective:
10916     case OMPD_unknown:
10917     default:
10918       llvm_unreachable("Unexpected standalone target data directive.");
10919       break;
10920     }
10921     CGF.EmitRuntimeCall(
10922         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10923         OffloadingArgs);
10924   };
10925 
10926   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10927                           &MapNamesArray](CodeGenFunction &CGF,
10928                                           PrePostActionTy &) {
10929     // Fill up the arrays with all the mapped variables.
10930     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10931 
10932     // Get map clause information.
10933     MappableExprsHandler MEHandler(D, CGF);
10934     MEHandler.generateAllInfo(CombinedInfo);
10935 
10936     CGOpenMPRuntime::TargetDataInfo Info;
10937     // Fill up the arrays and create the arguments.
10938     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10939                          /*IsNonContiguous=*/true);
10940     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10941                              D.hasClausesOfKind<OMPNowaitClause>();
10942     bool EmitDebug =
10943         CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10944     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10945                                             EmitDebug,
10946                                             /*ForEndCall=*/false);
10947     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10948     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10949                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10950     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10951                                       CGM.getPointerAlign());
10952     InputInfo.SizesArray =
10953         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10954     InputInfo.MappersArray =
10955         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10956     MapTypesArray = Info.RTArgs.MapTypesArray;
10957     MapNamesArray = Info.RTArgs.MapNamesArray;
10958     if (RequiresOuterTask)
10959       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10960     else
10961       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10962   };
10963 
10964   if (IfCond) {
10965     emitIfClause(CGF, IfCond, TargetThenGen,
10966                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10967   } else {
10968     RegionCodeGenTy ThenRCG(TargetThenGen);
10969     ThenRCG(CGF);
10970   }
10971 }
10972 
10973 namespace {
10974   /// Kind of parameter in a function with 'declare simd' directive.
10975 enum ParamKindTy {
10976   Linear,
10977   LinearRef,
10978   LinearUVal,
10979   LinearVal,
10980   Uniform,
10981   Vector,
10982 };
10983 /// Attribute set of the parameter.
10984 struct ParamAttrTy {
10985   ParamKindTy Kind = Vector;
10986   llvm::APSInt StrideOrArg;
10987   llvm::APSInt Alignment;
10988   bool HasVarStride = false;
10989 };
10990 } // namespace
10991 
10992 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10993                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10994   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10995   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10996   // of that clause. The VLEN value must be power of 2.
10997   // In other case the notion of the function`s "characteristic data type" (CDT)
10998   // is used to compute the vector length.
10999   // CDT is defined in the following order:
11000   //   a) For non-void function, the CDT is the return type.
11001   //   b) If the function has any non-uniform, non-linear parameters, then the
11002   //   CDT is the type of the first such parameter.
11003   //   c) If the CDT determined by a) or b) above is struct, union, or class
11004   //   type which is pass-by-value (except for the type that maps to the
11005   //   built-in complex data type), the characteristic data type is int.
11006   //   d) If none of the above three cases is applicable, the CDT is int.
11007   // The VLEN is then determined based on the CDT and the size of vector
11008   // register of that ISA for which current vector version is generated. The
11009   // VLEN is computed using the formula below:
11010   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11011   // where vector register size specified in section 3.2.1 Registers and the
11012   // Stack Frame of original AMD64 ABI document.
11013   QualType RetType = FD->getReturnType();
11014   if (RetType.isNull())
11015     return 0;
11016   ASTContext &C = FD->getASTContext();
11017   QualType CDT;
11018   if (!RetType.isNull() && !RetType->isVoidType()) {
11019     CDT = RetType;
11020   } else {
11021     unsigned Offset = 0;
11022     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11023       if (ParamAttrs[Offset].Kind == Vector)
11024         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11025       ++Offset;
11026     }
11027     if (CDT.isNull()) {
11028       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11029         if (ParamAttrs[I + Offset].Kind == Vector) {
11030           CDT = FD->getParamDecl(I)->getType();
11031           break;
11032         }
11033       }
11034     }
11035   }
11036   if (CDT.isNull())
11037     CDT = C.IntTy;
11038   CDT = CDT->getCanonicalTypeUnqualified();
11039   if (CDT->isRecordType() || CDT->isUnionType())
11040     CDT = C.IntTy;
11041   return C.getTypeSize(CDT);
11042 }
11043 
11044 /// Mangle the parameter part of the vector function name according to
11045 /// their OpenMP classification. The mangling function is defined in
11046 /// section 4.5 of the AAVFABI(2021Q1).
11047 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11048   SmallString<256> Buffer;
11049   llvm::raw_svector_ostream Out(Buffer);
11050   for (const auto &ParamAttr : ParamAttrs) {
11051     switch (ParamAttr.Kind) {
11052     case Linear:
11053       Out << 'l';
11054       break;
11055     case LinearRef:
11056       Out << 'R';
11057       break;
11058     case LinearUVal:
11059       Out << 'U';
11060       break;
11061     case LinearVal:
11062       Out << 'L';
11063       break;
11064     case Uniform:
11065       Out << 'u';
11066       break;
11067     case Vector:
11068       Out << 'v';
11069       break;
11070     }
11071     if (ParamAttr.HasVarStride)
11072       Out << "s" << ParamAttr.StrideOrArg;
11073     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11074              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11075       // Don't print the step value if it is not present or if it is
11076       // equal to 1.
11077       if (ParamAttr.StrideOrArg < 0)
11078         Out << 'n' << -ParamAttr.StrideOrArg;
11079       else if (ParamAttr.StrideOrArg != 1)
11080         Out << ParamAttr.StrideOrArg;
11081     }
11082 
11083     if (!!ParamAttr.Alignment)
11084       Out << 'a' << ParamAttr.Alignment;
11085   }
11086 
11087   return std::string(Out.str());
11088 }
11089 
11090 static void
11091 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11092                            const llvm::APSInt &VLENVal,
11093                            ArrayRef<ParamAttrTy> ParamAttrs,
11094                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11095   struct ISADataTy {
11096     char ISA;
11097     unsigned VecRegSize;
11098   };
11099   ISADataTy ISAData[] = {
11100       {
11101           'b', 128
11102       }, // SSE
11103       {
11104           'c', 256
11105       }, // AVX
11106       {
11107           'd', 256
11108       }, // AVX2
11109       {
11110           'e', 512
11111       }, // AVX512
11112   };
11113   llvm::SmallVector<char, 2> Masked;
11114   switch (State) {
11115   case OMPDeclareSimdDeclAttr::BS_Undefined:
11116     Masked.push_back('N');
11117     Masked.push_back('M');
11118     break;
11119   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11120     Masked.push_back('N');
11121     break;
11122   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11123     Masked.push_back('M');
11124     break;
11125   }
11126   for (char Mask : Masked) {
11127     for (const ISADataTy &Data : ISAData) {
11128       SmallString<256> Buffer;
11129       llvm::raw_svector_ostream Out(Buffer);
11130       Out << "_ZGV" << Data.ISA << Mask;
11131       if (!VLENVal) {
11132         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11133         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11134         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11135       } else {
11136         Out << VLENVal;
11137       }
11138       Out << mangleVectorParameters(ParamAttrs);
11139       Out << '_' << Fn->getName();
11140       Fn->addFnAttr(Out.str());
11141     }
11142   }
11143 }
11144 
11145 // This are the Functions that are needed to mangle the name of the
11146 // vector functions generated by the compiler, according to the rules
11147 // defined in the "Vector Function ABI specifications for AArch64",
11148 // available at
11149 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11150 
11151 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11152 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11153   QT = QT.getCanonicalType();
11154 
11155   if (QT->isVoidType())
11156     return false;
11157 
11158   if (Kind == ParamKindTy::Uniform)
11159     return false;
11160 
11161   if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11162     return false;
11163 
11164   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11165       !QT->isReferenceType())
11166     return false;
11167 
11168   return true;
11169 }
11170 
11171 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11172 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11173   QT = QT.getCanonicalType();
11174   unsigned Size = C.getTypeSize(QT);
11175 
11176   // Only scalars and complex within 16 bytes wide set PVB to true.
11177   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11178     return false;
11179 
11180   if (QT->isFloatingType())
11181     return true;
11182 
11183   if (QT->isIntegerType())
11184     return true;
11185 
11186   if (QT->isPointerType())
11187     return true;
11188 
11189   // TODO: Add support for complex types (section 3.1.2, item 2).
11190 
11191   return false;
11192 }
11193 
11194 /// Computes the lane size (LS) of a return type or of an input parameter,
11195 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11196 /// TODO: Add support for references, section 3.2.1, item 1.
11197 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11198   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11199     QualType PTy = QT.getCanonicalType()->getPointeeType();
11200     if (getAArch64PBV(PTy, C))
11201       return C.getTypeSize(PTy);
11202   }
11203   if (getAArch64PBV(QT, C))
11204     return C.getTypeSize(QT);
11205 
11206   return C.getTypeSize(C.getUIntPtrType());
11207 }
11208 
11209 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11210 // signature of the scalar function, as defined in 3.2.2 of the
11211 // AAVFABI.
11212 static std::tuple<unsigned, unsigned, bool>
11213 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11214   QualType RetType = FD->getReturnType().getCanonicalType();
11215 
11216   ASTContext &C = FD->getASTContext();
11217 
11218   bool OutputBecomesInput = false;
11219 
11220   llvm::SmallVector<unsigned, 8> Sizes;
11221   if (!RetType->isVoidType()) {
11222     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11223     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11224       OutputBecomesInput = true;
11225   }
11226   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11227     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11228     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11229   }
11230 
11231   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11232   // The LS of a function parameter / return value can only be a power
11233   // of 2, starting from 8 bits, up to 128.
11234   assert(llvm::all_of(Sizes,
11235                       [](unsigned Size) {
11236                         return Size == 8 || Size == 16 || Size == 32 ||
11237                                Size == 64 || Size == 128;
11238                       }) &&
11239          "Invalid size");
11240 
11241   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11242                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11243                          OutputBecomesInput);
11244 }
11245 
11246 // Function used to add the attribute. The parameter `VLEN` is
11247 // templated to allow the use of "x" when targeting scalable functions
11248 // for SVE.
11249 template <typename T>
11250 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11251                                  char ISA, StringRef ParSeq,
11252                                  StringRef MangledName, bool OutputBecomesInput,
11253                                  llvm::Function *Fn) {
11254   SmallString<256> Buffer;
11255   llvm::raw_svector_ostream Out(Buffer);
11256   Out << Prefix << ISA << LMask << VLEN;
11257   if (OutputBecomesInput)
11258     Out << "v";
11259   Out << ParSeq << "_" << MangledName;
11260   Fn->addFnAttr(Out.str());
11261 }
11262 
11263 // Helper function to generate the Advanced SIMD names depending on
11264 // the value of the NDS when simdlen is not present.
11265 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11266                                       StringRef Prefix, char ISA,
11267                                       StringRef ParSeq, StringRef MangledName,
11268                                       bool OutputBecomesInput,
11269                                       llvm::Function *Fn) {
11270   switch (NDS) {
11271   case 8:
11272     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11273                          OutputBecomesInput, Fn);
11274     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11275                          OutputBecomesInput, Fn);
11276     break;
11277   case 16:
11278     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11279                          OutputBecomesInput, Fn);
11280     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11281                          OutputBecomesInput, Fn);
11282     break;
11283   case 32:
11284     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11285                          OutputBecomesInput, Fn);
11286     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11287                          OutputBecomesInput, Fn);
11288     break;
11289   case 64:
11290   case 128:
11291     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11292                          OutputBecomesInput, Fn);
11293     break;
11294   default:
11295     llvm_unreachable("Scalar type is too wide.");
11296   }
11297 }
11298 
11299 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11300 static void emitAArch64DeclareSimdFunction(
11301     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11302     ArrayRef<ParamAttrTy> ParamAttrs,
11303     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11304     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11305 
11306   // Get basic data for building the vector signature.
11307   const auto Data = getNDSWDS(FD, ParamAttrs);
11308   const unsigned NDS = std::get<0>(Data);
11309   const unsigned WDS = std::get<1>(Data);
11310   const bool OutputBecomesInput = std::get<2>(Data);
11311 
11312   // Check the values provided via `simdlen` by the user.
11313   // 1. A `simdlen(1)` doesn't produce vector signatures,
11314   if (UserVLEN == 1) {
11315     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11316         DiagnosticsEngine::Warning,
11317         "The clause simdlen(1) has no effect when targeting aarch64.");
11318     CGM.getDiags().Report(SLoc, DiagID);
11319     return;
11320   }
11321 
11322   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11323   // Advanced SIMD output.
11324   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11325     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11326         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11327                                     "power of 2 when targeting Advanced SIMD.");
11328     CGM.getDiags().Report(SLoc, DiagID);
11329     return;
11330   }
11331 
11332   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11333   // limits.
11334   if (ISA == 's' && UserVLEN != 0) {
11335     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11336       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11337           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11338                                       "lanes in the architectural constraints "
11339                                       "for SVE (min is 128-bit, max is "
11340                                       "2048-bit, by steps of 128-bit)");
11341       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11342       return;
11343     }
11344   }
11345 
11346   // Sort out parameter sequence.
11347   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11348   StringRef Prefix = "_ZGV";
11349   // Generate simdlen from user input (if any).
11350   if (UserVLEN) {
11351     if (ISA == 's') {
11352       // SVE generates only a masked function.
11353       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11354                            OutputBecomesInput, Fn);
11355     } else {
11356       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11357       // Advanced SIMD generates one or two functions, depending on
11358       // the `[not]inbranch` clause.
11359       switch (State) {
11360       case OMPDeclareSimdDeclAttr::BS_Undefined:
11361         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11362                              OutputBecomesInput, Fn);
11363         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11364                              OutputBecomesInput, Fn);
11365         break;
11366       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11367         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11368                              OutputBecomesInput, Fn);
11369         break;
11370       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11371         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11372                              OutputBecomesInput, Fn);
11373         break;
11374       }
11375     }
11376   } else {
11377     // If no user simdlen is provided, follow the AAVFABI rules for
11378     // generating the vector length.
11379     if (ISA == 's') {
11380       // SVE, section 3.4.1, item 1.
11381       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11382                            OutputBecomesInput, Fn);
11383     } else {
11384       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11385       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11386       // two vector names depending on the use of the clause
11387       // `[not]inbranch`.
11388       switch (State) {
11389       case OMPDeclareSimdDeclAttr::BS_Undefined:
11390         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11391                                   OutputBecomesInput, Fn);
11392         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11393                                   OutputBecomesInput, Fn);
11394         break;
11395       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11396         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11397                                   OutputBecomesInput, Fn);
11398         break;
11399       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11400         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11401                                   OutputBecomesInput, Fn);
11402         break;
11403       }
11404     }
11405   }
11406 }
11407 
11408 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11409                                               llvm::Function *Fn) {
11410   ASTContext &C = CGM.getContext();
11411   FD = FD->getMostRecentDecl();
11412   while (FD) {
11413     // Map params to their positions in function decl.
11414     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11415     if (isa<CXXMethodDecl>(FD))
11416       ParamPositions.try_emplace(FD, 0);
11417     unsigned ParamPos = ParamPositions.size();
11418     for (const ParmVarDecl *P : FD->parameters()) {
11419       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11420       ++ParamPos;
11421     }
11422     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11423       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11424       // Mark uniform parameters.
11425       for (const Expr *E : Attr->uniforms()) {
11426         E = E->IgnoreParenImpCasts();
11427         unsigned Pos;
11428         if (isa<CXXThisExpr>(E)) {
11429           Pos = ParamPositions[FD];
11430         } else {
11431           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11432                                 ->getCanonicalDecl();
11433           auto It = ParamPositions.find(PVD);
11434           assert(It != ParamPositions.end() && "Function parameter not found");
11435           Pos = It->second;
11436         }
11437         ParamAttrs[Pos].Kind = Uniform;
11438       }
11439       // Get alignment info.
11440       auto *NI = Attr->alignments_begin();
11441       for (const Expr *E : Attr->aligneds()) {
11442         E = E->IgnoreParenImpCasts();
11443         unsigned Pos;
11444         QualType ParmTy;
11445         if (isa<CXXThisExpr>(E)) {
11446           Pos = ParamPositions[FD];
11447           ParmTy = E->getType();
11448         } else {
11449           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11450                                 ->getCanonicalDecl();
11451           auto It = ParamPositions.find(PVD);
11452           assert(It != ParamPositions.end() && "Function parameter not found");
11453           Pos = It->second;
11454           ParmTy = PVD->getType();
11455         }
11456         ParamAttrs[Pos].Alignment =
11457             (*NI)
11458                 ? (*NI)->EvaluateKnownConstInt(C)
11459                 : llvm::APSInt::getUnsigned(
11460                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11461                           .getQuantity());
11462         ++NI;
11463       }
11464       // Mark linear parameters.
11465       auto *SI = Attr->steps_begin();
11466       auto *MI = Attr->modifiers_begin();
11467       for (const Expr *E : Attr->linears()) {
11468         E = E->IgnoreParenImpCasts();
11469         unsigned Pos;
11470         bool IsReferenceType = false;
11471         // Rescaling factor needed to compute the linear parameter
11472         // value in the mangled name.
11473         unsigned PtrRescalingFactor = 1;
11474         if (isa<CXXThisExpr>(E)) {
11475           Pos = ParamPositions[FD];
11476           auto *P = cast<PointerType>(E->getType());
11477           PtrRescalingFactor = CGM.getContext()
11478                                    .getTypeSizeInChars(P->getPointeeType())
11479                                    .getQuantity();
11480         } else {
11481           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11482                                 ->getCanonicalDecl();
11483           auto It = ParamPositions.find(PVD);
11484           assert(It != ParamPositions.end() && "Function parameter not found");
11485           Pos = It->second;
11486           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11487             PtrRescalingFactor = CGM.getContext()
11488                                      .getTypeSizeInChars(P->getPointeeType())
11489                                      .getQuantity();
11490           else if (PVD->getType()->isReferenceType()) {
11491             IsReferenceType = true;
11492             PtrRescalingFactor =
11493                 CGM.getContext()
11494                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11495                     .getQuantity();
11496           }
11497         }
11498         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11499         if (*MI == OMPC_LINEAR_ref)
11500           ParamAttr.Kind = LinearRef;
11501         else if (*MI == OMPC_LINEAR_uval)
11502           ParamAttr.Kind = LinearUVal;
11503         else if (IsReferenceType)
11504           ParamAttr.Kind = LinearVal;
11505         else
11506           ParamAttr.Kind = Linear;
11507         // Assuming a stride of 1, for `linear` without modifiers.
11508         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11509         if (*SI) {
11510           Expr::EvalResult Result;
11511           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11512             if (const auto *DRE =
11513                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11514               if (const auto *StridePVD =
11515                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11516                 ParamAttr.HasVarStride = true;
11517                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11518                 assert(It != ParamPositions.end() &&
11519                        "Function parameter not found");
11520                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11521               }
11522             }
11523           } else {
11524             ParamAttr.StrideOrArg = Result.Val.getInt();
11525           }
11526         }
11527         // If we are using a linear clause on a pointer, we need to
11528         // rescale the value of linear_step with the byte size of the
11529         // pointee type.
11530         if (!ParamAttr.HasVarStride &&
11531             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11532           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11533         ++SI;
11534         ++MI;
11535       }
11536       llvm::APSInt VLENVal;
11537       SourceLocation ExprLoc;
11538       const Expr *VLENExpr = Attr->getSimdlen();
11539       if (VLENExpr) {
11540         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11541         ExprLoc = VLENExpr->getExprLoc();
11542       }
11543       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11544       if (CGM.getTriple().isX86()) {
11545         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11546       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11547         unsigned VLEN = VLENVal.getExtValue();
11548         StringRef MangledName = Fn->getName();
11549         if (CGM.getTarget().hasFeature("sve"))
11550           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11551                                          MangledName, 's', 128, Fn, ExprLoc);
11552         else if (CGM.getTarget().hasFeature("neon"))
11553           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11554                                          MangledName, 'n', 128, Fn, ExprLoc);
11555       }
11556     }
11557     FD = FD->getPreviousDecl();
11558   }
11559 }
11560 
11561 namespace {
11562 /// Cleanup action for doacross support.
11563 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11564 public:
11565   static const int DoacrossFinArgs = 2;
11566 
11567 private:
11568   llvm::FunctionCallee RTLFn;
11569   llvm::Value *Args[DoacrossFinArgs];
11570 
11571 public:
11572   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11573                     ArrayRef<llvm::Value *> CallArgs)
11574       : RTLFn(RTLFn) {
11575     assert(CallArgs.size() == DoacrossFinArgs);
11576     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11577   }
11578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11579     if (!CGF.HaveInsertPoint())
11580       return;
11581     CGF.EmitRuntimeCall(RTLFn, Args);
11582   }
11583 };
11584 } // namespace
11585 
11586 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11587                                        const OMPLoopDirective &D,
11588                                        ArrayRef<Expr *> NumIterations) {
11589   if (!CGF.HaveInsertPoint())
11590     return;
11591 
11592   ASTContext &C = CGM.getContext();
11593   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11594   RecordDecl *RD;
11595   if (KmpDimTy.isNull()) {
11596     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11597     //  kmp_int64 lo; // lower
11598     //  kmp_int64 up; // upper
11599     //  kmp_int64 st; // stride
11600     // };
11601     RD = C.buildImplicitRecord("kmp_dim");
11602     RD->startDefinition();
11603     addFieldToRecordDecl(C, RD, Int64Ty);
11604     addFieldToRecordDecl(C, RD, Int64Ty);
11605     addFieldToRecordDecl(C, RD, Int64Ty);
11606     RD->completeDefinition();
11607     KmpDimTy = C.getRecordType(RD);
11608   } else {
11609     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11610   }
11611   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11612   QualType ArrayTy =
11613       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11614 
11615   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11616   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11617   enum { LowerFD = 0, UpperFD, StrideFD };
11618   // Fill dims with data.
11619   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11620     LValue DimsLVal = CGF.MakeAddrLValue(
11621         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11622     // dims.upper = num_iterations;
11623     LValue UpperLVal = CGF.EmitLValueForField(
11624         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11625     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11626         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11627         Int64Ty, NumIterations[I]->getExprLoc());
11628     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11629     // dims.stride = 1;
11630     LValue StrideLVal = CGF.EmitLValueForField(
11631         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11632     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11633                           StrideLVal);
11634   }
11635 
11636   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11637   // kmp_int32 num_dims, struct kmp_dim * dims);
11638   llvm::Value *Args[] = {
11639       emitUpdateLocation(CGF, D.getBeginLoc()),
11640       getThreadID(CGF, D.getBeginLoc()),
11641       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11642       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11643           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11644           CGM.VoidPtrTy)};
11645 
11646   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11647       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11648   CGF.EmitRuntimeCall(RTLFn, Args);
11649   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11650       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11651   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11652       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11653   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11654                                              llvm::ArrayRef(FiniArgs));
11655 }
11656 
11657 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11658                                           const OMPDependClause *C) {
11659   QualType Int64Ty =
11660       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11661   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11662   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11663       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11664   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11665   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11666     const Expr *CounterVal = C->getLoopData(I);
11667     assert(CounterVal);
11668     llvm::Value *CntVal = CGF.EmitScalarConversion(
11669         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11670         CounterVal->getExprLoc());
11671     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11672                           /*Volatile=*/false, Int64Ty);
11673   }
11674   llvm::Value *Args[] = {
11675       emitUpdateLocation(CGF, C->getBeginLoc()),
11676       getThreadID(CGF, C->getBeginLoc()),
11677       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11678   llvm::FunctionCallee RTLFn;
11679   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11680     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11681                                                   OMPRTL___kmpc_doacross_post);
11682   } else {
11683     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11684     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11685                                                   OMPRTL___kmpc_doacross_wait);
11686   }
11687   CGF.EmitRuntimeCall(RTLFn, Args);
11688 }
11689 
11690 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11691                                llvm::FunctionCallee Callee,
11692                                ArrayRef<llvm::Value *> Args) const {
11693   assert(Loc.isValid() && "Outlined function call location must be valid.");
11694   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11695 
11696   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11697     if (Fn->doesNotThrow()) {
11698       CGF.EmitNounwindRuntimeCall(Fn, Args);
11699       return;
11700     }
11701   }
11702   CGF.EmitRuntimeCall(Callee, Args);
11703 }
11704 
11705 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11706     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11707     ArrayRef<llvm::Value *> Args) const {
11708   emitCall(CGF, Loc, OutlinedFn, Args);
11709 }
11710 
11711 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11712   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11713     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11714       HasEmittedDeclareTargetRegion = true;
11715 }
11716 
11717 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11718                                              const VarDecl *NativeParam,
11719                                              const VarDecl *TargetParam) const {
11720   return CGF.GetAddrOfLocalVar(NativeParam);
11721 }
11722 
11723 /// Return allocator value from expression, or return a null allocator (default
11724 /// when no allocator specified).
11725 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11726                                     const Expr *Allocator) {
11727   llvm::Value *AllocVal;
11728   if (Allocator) {
11729     AllocVal = CGF.EmitScalarExpr(Allocator);
11730     // According to the standard, the original allocator type is a enum
11731     // (integer). Convert to pointer type, if required.
11732     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11733                                         CGF.getContext().VoidPtrTy,
11734                                         Allocator->getExprLoc());
11735   } else {
11736     // If no allocator specified, it defaults to the null allocator.
11737     AllocVal = llvm::Constant::getNullValue(
11738         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11739   }
11740   return AllocVal;
11741 }
11742 
11743 /// Return the alignment from an allocate directive if present.
11744 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11745   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11746 
11747   if (!AllocateAlignment)
11748     return nullptr;
11749 
11750   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11751 }
11752 
11753 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11754                                                    const VarDecl *VD) {
11755   if (!VD)
11756     return Address::invalid();
11757   Address UntiedAddr = Address::invalid();
11758   Address UntiedRealAddr = Address::invalid();
11759   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11760   if (It != FunctionToUntiedTaskStackMap.end()) {
11761     const UntiedLocalVarsAddressesMap &UntiedData =
11762         UntiedLocalVarsStack[It->second];
11763     auto I = UntiedData.find(VD);
11764     if (I != UntiedData.end()) {
11765       UntiedAddr = I->second.first;
11766       UntiedRealAddr = I->second.second;
11767     }
11768   }
11769   const VarDecl *CVD = VD->getCanonicalDecl();
11770   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11771     // Use the default allocation.
11772     if (!isAllocatableDecl(VD))
11773       return UntiedAddr;
11774     llvm::Value *Size;
11775     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11776     if (CVD->getType()->isVariablyModifiedType()) {
11777       Size = CGF.getTypeSize(CVD->getType());
11778       // Align the size: ((size + align - 1) / align) * align
11779       Size = CGF.Builder.CreateNUWAdd(
11780           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11781       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11782       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11783     } else {
11784       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11785       Size = CGM.getSize(Sz.alignTo(Align));
11786     }
11787     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11788     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11789     const Expr *Allocator = AA->getAllocator();
11790     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11791     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11792     SmallVector<llvm::Value *, 4> Args;
11793     Args.push_back(ThreadID);
11794     if (Alignment)
11795       Args.push_back(Alignment);
11796     Args.push_back(Size);
11797     Args.push_back(AllocVal);
11798     llvm::omp::RuntimeFunction FnID =
11799         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11800     llvm::Value *Addr = CGF.EmitRuntimeCall(
11801         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11802         getName({CVD->getName(), ".void.addr"}));
11803     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11804         CGM.getModule(), OMPRTL___kmpc_free);
11805     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11806     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11807         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11808     if (UntiedAddr.isValid())
11809       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11810 
11811     // Cleanup action for allocate support.
11812     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11813       llvm::FunctionCallee RTLFn;
11814       SourceLocation::UIntTy LocEncoding;
11815       Address Addr;
11816       const Expr *AllocExpr;
11817 
11818     public:
11819       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11820                            SourceLocation::UIntTy LocEncoding, Address Addr,
11821                            const Expr *AllocExpr)
11822           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11823             AllocExpr(AllocExpr) {}
11824       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11825         if (!CGF.HaveInsertPoint())
11826           return;
11827         llvm::Value *Args[3];
11828         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11829             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11830         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11831             Addr.getPointer(), CGF.VoidPtrTy);
11832         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11833         Args[2] = AllocVal;
11834         CGF.EmitRuntimeCall(RTLFn, Args);
11835       }
11836     };
11837     Address VDAddr =
11838         UntiedRealAddr.isValid()
11839             ? UntiedRealAddr
11840             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11841     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11842         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11843         VDAddr, Allocator);
11844     if (UntiedRealAddr.isValid())
11845       if (auto *Region =
11846               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11847         Region->emitUntiedSwitch(CGF);
11848     return VDAddr;
11849   }
11850   return UntiedAddr;
11851 }
11852 
11853 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11854                                              const VarDecl *VD) const {
11855   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11856   if (It == FunctionToUntiedTaskStackMap.end())
11857     return false;
11858   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11859 }
11860 
11861 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11862     CodeGenModule &CGM, const OMPLoopDirective &S)
11863     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11864   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11865   if (!NeedToPush)
11866     return;
11867   NontemporalDeclsSet &DS =
11868       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11869   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11870     for (const Stmt *Ref : C->private_refs()) {
11871       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11872       const ValueDecl *VD;
11873       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11874         VD = DRE->getDecl();
11875       } else {
11876         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11877         assert((ME->isImplicitCXXThis() ||
11878                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11879                "Expected member of current class.");
11880         VD = ME->getMemberDecl();
11881       }
11882       DS.insert(VD);
11883     }
11884   }
11885 }
11886 
11887 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11888   if (!NeedToPush)
11889     return;
11890   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11891 }
11892 
11893 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11894     CodeGenFunction &CGF,
11895     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11896                           std::pair<Address, Address>> &LocalVars)
11897     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11898   if (!NeedToPush)
11899     return;
11900   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11901       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11902   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11903 }
11904 
11905 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11906   if (!NeedToPush)
11907     return;
11908   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11909 }
11910 
11911 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11912   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11913 
11914   return llvm::any_of(
11915       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11916       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11917 }
11918 
11919 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11920     const OMPExecutableDirective &S,
11921     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11922     const {
11923   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11924   // Vars in target/task regions must be excluded completely.
11925   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11926       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11927     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11928     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11929     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11930     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11931       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11932         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11933     }
11934   }
11935   // Exclude vars in private clauses.
11936   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11937     for (const Expr *Ref : C->varlists()) {
11938       if (!Ref->getType()->isScalarType())
11939         continue;
11940       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11941       if (!DRE)
11942         continue;
11943       NeedToCheckForLPCs.insert(DRE->getDecl());
11944     }
11945   }
11946   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11947     for (const Expr *Ref : C->varlists()) {
11948       if (!Ref->getType()->isScalarType())
11949         continue;
11950       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11951       if (!DRE)
11952         continue;
11953       NeedToCheckForLPCs.insert(DRE->getDecl());
11954     }
11955   }
11956   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11957     for (const Expr *Ref : C->varlists()) {
11958       if (!Ref->getType()->isScalarType())
11959         continue;
11960       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11961       if (!DRE)
11962         continue;
11963       NeedToCheckForLPCs.insert(DRE->getDecl());
11964     }
11965   }
11966   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11967     for (const Expr *Ref : C->varlists()) {
11968       if (!Ref->getType()->isScalarType())
11969         continue;
11970       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11971       if (!DRE)
11972         continue;
11973       NeedToCheckForLPCs.insert(DRE->getDecl());
11974     }
11975   }
11976   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11977     for (const Expr *Ref : C->varlists()) {
11978       if (!Ref->getType()->isScalarType())
11979         continue;
11980       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11981       if (!DRE)
11982         continue;
11983       NeedToCheckForLPCs.insert(DRE->getDecl());
11984     }
11985   }
11986   for (const Decl *VD : NeedToCheckForLPCs) {
11987     for (const LastprivateConditionalData &Data :
11988          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11989       if (Data.DeclToUniqueName.count(VD) > 0) {
11990         if (!Data.Disabled)
11991           NeedToAddForLPCsAsDisabled.insert(VD);
11992         break;
11993       }
11994     }
11995   }
11996 }
11997 
11998 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11999     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12000     : CGM(CGF.CGM),
12001       Action((CGM.getLangOpts().OpenMP >= 50 &&
12002               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12003                            [](const OMPLastprivateClause *C) {
12004                              return C->getKind() ==
12005                                     OMPC_LASTPRIVATE_conditional;
12006                            }))
12007                  ? ActionToDo::PushAsLastprivateConditional
12008                  : ActionToDo::DoNotPush) {
12009   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12010   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12011     return;
12012   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12013          "Expected a push action.");
12014   LastprivateConditionalData &Data =
12015       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12016   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12017     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12018       continue;
12019 
12020     for (const Expr *Ref : C->varlists()) {
12021       Data.DeclToUniqueName.insert(std::make_pair(
12022           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12023           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12024     }
12025   }
12026   Data.IVLVal = IVLVal;
12027   Data.Fn = CGF.CurFn;
12028 }
12029 
12030 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12031     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12032     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12033   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12034   if (CGM.getLangOpts().OpenMP < 50)
12035     return;
12036   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12037   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12038   if (!NeedToAddForLPCsAsDisabled.empty()) {
12039     Action = ActionToDo::DisableLastprivateConditional;
12040     LastprivateConditionalData &Data =
12041         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12042     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12043       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12044     Data.Fn = CGF.CurFn;
12045     Data.Disabled = true;
12046   }
12047 }
12048 
12049 CGOpenMPRuntime::LastprivateConditionalRAII
12050 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12051     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12052   return LastprivateConditionalRAII(CGF, S);
12053 }
12054 
12055 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12056   if (CGM.getLangOpts().OpenMP < 50)
12057     return;
12058   if (Action == ActionToDo::DisableLastprivateConditional) {
12059     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12060            "Expected list of disabled private vars.");
12061     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12062   }
12063   if (Action == ActionToDo::PushAsLastprivateConditional) {
12064     assert(
12065         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12066         "Expected list of lastprivate conditional vars.");
12067     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12068   }
12069 }
12070 
12071 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12072                                                         const VarDecl *VD) {
12073   ASTContext &C = CGM.getContext();
12074   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12075   if (I == LastprivateConditionalToTypes.end())
12076     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12077   QualType NewType;
12078   const FieldDecl *VDField;
12079   const FieldDecl *FiredField;
12080   LValue BaseLVal;
12081   auto VI = I->getSecond().find(VD);
12082   if (VI == I->getSecond().end()) {
12083     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12084     RD->startDefinition();
12085     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12086     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12087     RD->completeDefinition();
12088     NewType = C.getRecordType(RD);
12089     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12090     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12091     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12092   } else {
12093     NewType = std::get<0>(VI->getSecond());
12094     VDField = std::get<1>(VI->getSecond());
12095     FiredField = std::get<2>(VI->getSecond());
12096     BaseLVal = std::get<3>(VI->getSecond());
12097   }
12098   LValue FiredLVal =
12099       CGF.EmitLValueForField(BaseLVal, FiredField);
12100   CGF.EmitStoreOfScalar(
12101       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12102       FiredLVal);
12103   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12104 }
12105 
12106 namespace {
12107 /// Checks if the lastprivate conditional variable is referenced in LHS.
12108 class LastprivateConditionalRefChecker final
12109     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12110   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12111   const Expr *FoundE = nullptr;
12112   const Decl *FoundD = nullptr;
12113   StringRef UniqueDeclName;
12114   LValue IVLVal;
12115   llvm::Function *FoundFn = nullptr;
12116   SourceLocation Loc;
12117 
12118 public:
12119   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12120     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12121          llvm::reverse(LPM)) {
12122       auto It = D.DeclToUniqueName.find(E->getDecl());
12123       if (It == D.DeclToUniqueName.end())
12124         continue;
12125       if (D.Disabled)
12126         return false;
12127       FoundE = E;
12128       FoundD = E->getDecl()->getCanonicalDecl();
12129       UniqueDeclName = It->second;
12130       IVLVal = D.IVLVal;
12131       FoundFn = D.Fn;
12132       break;
12133     }
12134     return FoundE == E;
12135   }
12136   bool VisitMemberExpr(const MemberExpr *E) {
12137     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12138       return false;
12139     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12140          llvm::reverse(LPM)) {
12141       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12142       if (It == D.DeclToUniqueName.end())
12143         continue;
12144       if (D.Disabled)
12145         return false;
12146       FoundE = E;
12147       FoundD = E->getMemberDecl()->getCanonicalDecl();
12148       UniqueDeclName = It->second;
12149       IVLVal = D.IVLVal;
12150       FoundFn = D.Fn;
12151       break;
12152     }
12153     return FoundE == E;
12154   }
12155   bool VisitStmt(const Stmt *S) {
12156     for (const Stmt *Child : S->children()) {
12157       if (!Child)
12158         continue;
12159       if (const auto *E = dyn_cast<Expr>(Child))
12160         if (!E->isGLValue())
12161           continue;
12162       if (Visit(Child))
12163         return true;
12164     }
12165     return false;
12166   }
12167   explicit LastprivateConditionalRefChecker(
12168       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12169       : LPM(LPM) {}
12170   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12171   getFoundData() const {
12172     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12173   }
12174 };
12175 } // namespace
12176 
12177 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12178                                                        LValue IVLVal,
12179                                                        StringRef UniqueDeclName,
12180                                                        LValue LVal,
12181                                                        SourceLocation Loc) {
12182   // Last updated loop counter for the lastprivate conditional var.
12183   // int<xx> last_iv = 0;
12184   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12185   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12186       LLIVTy, getName({UniqueDeclName, "iv"}));
12187   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12188       IVLVal.getAlignment().getAsAlign());
12189   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12190 
12191   // Last value of the lastprivate conditional.
12192   // decltype(priv_a) last_a;
12193   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12194       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12195   Last->setAlignment(LVal.getAlignment().getAsAlign());
12196   LValue LastLVal = CGF.MakeAddrLValue(
12197       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12198 
12199   // Global loop counter. Required to handle inner parallel-for regions.
12200   // iv
12201   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12202 
12203   // #pragma omp critical(a)
12204   // if (last_iv <= iv) {
12205   //   last_iv = iv;
12206   //   last_a = priv_a;
12207   // }
12208   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12209                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12210     Action.Enter(CGF);
12211     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12212     // (last_iv <= iv) ? Check if the variable is updated and store new
12213     // value in global var.
12214     llvm::Value *CmpRes;
12215     if (IVLVal.getType()->isSignedIntegerType()) {
12216       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12217     } else {
12218       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12219              "Loop iteration variable must be integer.");
12220       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12221     }
12222     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12223     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12224     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12225     // {
12226     CGF.EmitBlock(ThenBB);
12227 
12228     //   last_iv = iv;
12229     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12230 
12231     //   last_a = priv_a;
12232     switch (CGF.getEvaluationKind(LVal.getType())) {
12233     case TEK_Scalar: {
12234       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12235       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12236       break;
12237     }
12238     case TEK_Complex: {
12239       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12240       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12241       break;
12242     }
12243     case TEK_Aggregate:
12244       llvm_unreachable(
12245           "Aggregates are not supported in lastprivate conditional.");
12246     }
12247     // }
12248     CGF.EmitBranch(ExitBB);
12249     // There is no need to emit line number for unconditional branch.
12250     (void)ApplyDebugLocation::CreateEmpty(CGF);
12251     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12252   };
12253 
12254   if (CGM.getLangOpts().OpenMPSimd) {
12255     // Do not emit as a critical region as no parallel region could be emitted.
12256     RegionCodeGenTy ThenRCG(CodeGen);
12257     ThenRCG(CGF);
12258   } else {
12259     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12260   }
12261 }
12262 
12263 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12264                                                          const Expr *LHS) {
12265   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12266     return;
12267   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12268   if (!Checker.Visit(LHS))
12269     return;
12270   const Expr *FoundE;
12271   const Decl *FoundD;
12272   StringRef UniqueDeclName;
12273   LValue IVLVal;
12274   llvm::Function *FoundFn;
12275   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12276       Checker.getFoundData();
12277   if (FoundFn != CGF.CurFn) {
12278     // Special codegen for inner parallel regions.
12279     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12280     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12281     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12282            "Lastprivate conditional is not found in outer region.");
12283     QualType StructTy = std::get<0>(It->getSecond());
12284     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12285     LValue PrivLVal = CGF.EmitLValue(FoundE);
12286     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12287         PrivLVal.getAddress(CGF),
12288         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12289         CGF.ConvertTypeForMem(StructTy));
12290     LValue BaseLVal =
12291         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12292     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12293     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12294                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12295                         FiredLVal, llvm::AtomicOrdering::Unordered,
12296                         /*IsVolatile=*/true, /*isInit=*/false);
12297     return;
12298   }
12299 
12300   // Private address of the lastprivate conditional in the current context.
12301   // priv_a
12302   LValue LVal = CGF.EmitLValue(FoundE);
12303   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12304                                    FoundE->getExprLoc());
12305 }
12306 
12307 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12308     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12309     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12310   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12311     return;
12312   auto Range = llvm::reverse(LastprivateConditionalStack);
12313   auto It = llvm::find_if(
12314       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12315   if (It == Range.end() || It->Fn != CGF.CurFn)
12316     return;
12317   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12318   assert(LPCI != LastprivateConditionalToTypes.end() &&
12319          "Lastprivates must be registered already.");
12320   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12321   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12322   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12323   for (const auto &Pair : It->DeclToUniqueName) {
12324     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12325     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12326       continue;
12327     auto I = LPCI->getSecond().find(Pair.first);
12328     assert(I != LPCI->getSecond().end() &&
12329            "Lastprivate must be rehistered already.");
12330     // bool Cmp = priv_a.Fired != 0;
12331     LValue BaseLVal = std::get<3>(I->getSecond());
12332     LValue FiredLVal =
12333         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12334     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12335     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12336     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12337     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12338     // if (Cmp) {
12339     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12340     CGF.EmitBlock(ThenBB);
12341     Address Addr = CGF.GetAddrOfLocalVar(VD);
12342     LValue LVal;
12343     if (VD->getType()->isReferenceType())
12344       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12345                                            AlignmentSource::Decl);
12346     else
12347       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12348                                 AlignmentSource::Decl);
12349     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12350                                      D.getBeginLoc());
12351     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12352     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12353     // }
12354   }
12355 }
12356 
12357 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12358     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12359     SourceLocation Loc) {
12360   if (CGF.getLangOpts().OpenMP < 50)
12361     return;
12362   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12363   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12364          "Unknown lastprivate conditional variable.");
12365   StringRef UniqueName = It->second;
12366   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12367   // The variable was not updated in the region - exit.
12368   if (!GV)
12369     return;
12370   LValue LPLVal = CGF.MakeAddrLValue(
12371       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12372       PrivLVal.getType().getNonReferenceType());
12373   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12374   CGF.EmitStoreOfScalar(Res, PrivLVal);
12375 }
12376 
12377 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12378     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12379     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12380   llvm_unreachable("Not supported in SIMD-only mode");
12381 }
12382 
12383 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12384     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12385     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12386   llvm_unreachable("Not supported in SIMD-only mode");
12387 }
12388 
12389 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12390     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12391     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12392     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12393     bool Tied, unsigned &NumberOfParts) {
12394   llvm_unreachable("Not supported in SIMD-only mode");
12395 }
12396 
12397 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12398                                            SourceLocation Loc,
12399                                            llvm::Function *OutlinedFn,
12400                                            ArrayRef<llvm::Value *> CapturedVars,
12401                                            const Expr *IfCond,
12402                                            llvm::Value *NumThreads) {
12403   llvm_unreachable("Not supported in SIMD-only mode");
12404 }
12405 
12406 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12407     CodeGenFunction &CGF, StringRef CriticalName,
12408     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12409     const Expr *Hint) {
12410   llvm_unreachable("Not supported in SIMD-only mode");
12411 }
12412 
12413 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12414                                            const RegionCodeGenTy &MasterOpGen,
12415                                            SourceLocation Loc) {
12416   llvm_unreachable("Not supported in SIMD-only mode");
12417 }
12418 
12419 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12420                                            const RegionCodeGenTy &MasterOpGen,
12421                                            SourceLocation Loc,
12422                                            const Expr *Filter) {
12423   llvm_unreachable("Not supported in SIMD-only mode");
12424 }
12425 
12426 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12427                                             SourceLocation Loc) {
12428   llvm_unreachable("Not supported in SIMD-only mode");
12429 }
12430 
12431 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12432     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12433     SourceLocation Loc) {
12434   llvm_unreachable("Not supported in SIMD-only mode");
12435 }
12436 
12437 void CGOpenMPSIMDRuntime::emitSingleRegion(
12438     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12439     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12440     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12441     ArrayRef<const Expr *> AssignmentOps) {
12442   llvm_unreachable("Not supported in SIMD-only mode");
12443 }
12444 
12445 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12446                                             const RegionCodeGenTy &OrderedOpGen,
12447                                             SourceLocation Loc,
12448                                             bool IsThreads) {
12449   llvm_unreachable("Not supported in SIMD-only mode");
12450 }
12451 
12452 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12453                                           SourceLocation Loc,
12454                                           OpenMPDirectiveKind Kind,
12455                                           bool EmitChecks,
12456                                           bool ForceSimpleCall) {
12457   llvm_unreachable("Not supported in SIMD-only mode");
12458 }
12459 
12460 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12461     CodeGenFunction &CGF, SourceLocation Loc,
12462     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12463     bool Ordered, const DispatchRTInput &DispatchValues) {
12464   llvm_unreachable("Not supported in SIMD-only mode");
12465 }
12466 
12467 void CGOpenMPSIMDRuntime::emitForStaticInit(
12468     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12469     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12470   llvm_unreachable("Not supported in SIMD-only mode");
12471 }
12472 
12473 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12474     CodeGenFunction &CGF, SourceLocation Loc,
12475     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12476   llvm_unreachable("Not supported in SIMD-only mode");
12477 }
12478 
12479 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12480                                                      SourceLocation Loc,
12481                                                      unsigned IVSize,
12482                                                      bool IVSigned) {
12483   llvm_unreachable("Not supported in SIMD-only mode");
12484 }
12485 
12486 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12487                                               SourceLocation Loc,
12488                                               OpenMPDirectiveKind DKind) {
12489   llvm_unreachable("Not supported in SIMD-only mode");
12490 }
12491 
12492 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12493                                               SourceLocation Loc,
12494                                               unsigned IVSize, bool IVSigned,
12495                                               Address IL, Address LB,
12496                                               Address UB, Address ST) {
12497   llvm_unreachable("Not supported in SIMD-only mode");
12498 }
12499 
12500 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12501                                                llvm::Value *NumThreads,
12502                                                SourceLocation Loc) {
12503   llvm_unreachable("Not supported in SIMD-only mode");
12504 }
12505 
12506 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12507                                              ProcBindKind ProcBind,
12508                                              SourceLocation Loc) {
12509   llvm_unreachable("Not supported in SIMD-only mode");
12510 }
12511 
12512 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12513                                                     const VarDecl *VD,
12514                                                     Address VDAddr,
12515                                                     SourceLocation Loc) {
12516   llvm_unreachable("Not supported in SIMD-only mode");
12517 }
12518 
12519 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12520     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12521     CodeGenFunction *CGF) {
12522   llvm_unreachable("Not supported in SIMD-only mode");
12523 }
12524 
12525 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12526     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12527   llvm_unreachable("Not supported in SIMD-only mode");
12528 }
12529 
12530 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12531                                     ArrayRef<const Expr *> Vars,
12532                                     SourceLocation Loc,
12533                                     llvm::AtomicOrdering AO) {
12534   llvm_unreachable("Not supported in SIMD-only mode");
12535 }
12536 
12537 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12538                                        const OMPExecutableDirective &D,
12539                                        llvm::Function *TaskFunction,
12540                                        QualType SharedsTy, Address Shareds,
12541                                        const Expr *IfCond,
12542                                        const OMPTaskDataTy &Data) {
12543   llvm_unreachable("Not supported in SIMD-only mode");
12544 }
12545 
12546 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12547     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12548     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12549     const Expr *IfCond, const OMPTaskDataTy &Data) {
12550   llvm_unreachable("Not supported in SIMD-only mode");
12551 }
12552 
12553 void CGOpenMPSIMDRuntime::emitReduction(
12554     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12555     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12556     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12557   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12558   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12559                                  ReductionOps, Options);
12560 }
12561 
12562 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12563     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12564     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12565   llvm_unreachable("Not supported in SIMD-only mode");
12566 }
12567 
12568 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12569                                                 SourceLocation Loc,
12570                                                 bool IsWorksharingReduction) {
12571   llvm_unreachable("Not supported in SIMD-only mode");
12572 }
12573 
12574 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12575                                                   SourceLocation Loc,
12576                                                   ReductionCodeGen &RCG,
12577                                                   unsigned N) {
12578   llvm_unreachable("Not supported in SIMD-only mode");
12579 }
12580 
12581 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12582                                                   SourceLocation Loc,
12583                                                   llvm::Value *ReductionsPtr,
12584                                                   LValue SharedLVal) {
12585   llvm_unreachable("Not supported in SIMD-only mode");
12586 }
12587 
12588 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12589                                            SourceLocation Loc,
12590                                            const OMPTaskDataTy &Data) {
12591   llvm_unreachable("Not supported in SIMD-only mode");
12592 }
12593 
12594 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12595     CodeGenFunction &CGF, SourceLocation Loc,
12596     OpenMPDirectiveKind CancelRegion) {
12597   llvm_unreachable("Not supported in SIMD-only mode");
12598 }
12599 
12600 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12601                                          SourceLocation Loc, const Expr *IfCond,
12602                                          OpenMPDirectiveKind CancelRegion) {
12603   llvm_unreachable("Not supported in SIMD-only mode");
12604 }
12605 
12606 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12607     const OMPExecutableDirective &D, StringRef ParentName,
12608     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12609     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12610   llvm_unreachable("Not supported in SIMD-only mode");
12611 }
12612 
12613 void CGOpenMPSIMDRuntime::emitTargetCall(
12614     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12615     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12616     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12617     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12618                                      const OMPLoopDirective &D)>
12619         SizeEmitter) {
12620   llvm_unreachable("Not supported in SIMD-only mode");
12621 }
12622 
12623 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12624   llvm_unreachable("Not supported in SIMD-only mode");
12625 }
12626 
12627 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12628   llvm_unreachable("Not supported in SIMD-only mode");
12629 }
12630 
12631 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12632   return false;
12633 }
12634 
12635 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12636                                         const OMPExecutableDirective &D,
12637                                         SourceLocation Loc,
12638                                         llvm::Function *OutlinedFn,
12639                                         ArrayRef<llvm::Value *> CapturedVars) {
12640   llvm_unreachable("Not supported in SIMD-only mode");
12641 }
12642 
12643 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12644                                              const Expr *NumTeams,
12645                                              const Expr *ThreadLimit,
12646                                              SourceLocation Loc) {
12647   llvm_unreachable("Not supported in SIMD-only mode");
12648 }
12649 
12650 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12651     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12652     const Expr *Device, const RegionCodeGenTy &CodeGen,
12653     CGOpenMPRuntime::TargetDataInfo &Info) {
12654   llvm_unreachable("Not supported in SIMD-only mode");
12655 }
12656 
12657 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12658     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12659     const Expr *Device) {
12660   llvm_unreachable("Not supported in SIMD-only mode");
12661 }
12662 
12663 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12664                                            const OMPLoopDirective &D,
12665                                            ArrayRef<Expr *> NumIterations) {
12666   llvm_unreachable("Not supported in SIMD-only mode");
12667 }
12668 
12669 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12670                                               const OMPDependClause *C) {
12671   llvm_unreachable("Not supported in SIMD-only mode");
12672 }
12673 
12674 const VarDecl *
12675 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12676                                         const VarDecl *NativeParam) const {
12677   llvm_unreachable("Not supported in SIMD-only mode");
12678 }
12679 
12680 Address
12681 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12682                                          const VarDecl *NativeParam,
12683                                          const VarDecl *TargetParam) const {
12684   llvm_unreachable("Not supported in SIMD-only mode");
12685 }
12686