1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/CodeGen/ConstantInitBuilder.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetOperations.h"
27 #include "llvm/Bitcode/BitcodeReader.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <cassert>
35 
36 using namespace clang;
37 using namespace CodeGen;
38 using namespace llvm::omp;
39 
40 namespace {
41 /// Base class for handling code generation inside OpenMP regions.
42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
43 public:
44   /// Kinds of OpenMP regions used in codegen.
45   enum CGOpenMPRegionKind {
46     /// Region with outlined function for standalone 'parallel'
47     /// directive.
48     ParallelOutlinedRegion,
49     /// Region with outlined function for standalone 'task' directive.
50     TaskOutlinedRegion,
51     /// Region for constructs that do not require function outlining,
52     /// like 'for', 'sections', 'atomic' etc. directives.
53     InlinedRegion,
54     /// Region with outlined function for standalone 'target' directive.
55     TargetRegion,
56   };
57 
58   CGOpenMPRegionInfo(const CapturedStmt &CS,
59                      const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
63         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
64 
65   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
69         Kind(Kind), HasCancel(HasCancel) {}
70 
71   /// Get a variable or parameter for storing global thread id
72   /// inside OpenMP construct.
73   virtual const VarDecl *getThreadIDVariable() const = 0;
74 
75   /// Emit the captured statement body.
76   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
77 
78   /// Get an LValue for the current ThreadID variable.
79   /// \return LValue for thread id variable. This LValue always has type int32*.
80   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
81 
82   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
83 
84   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
85 
86   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
87 
88   bool hasCancel() const { return HasCancel; }
89 
90   static bool classof(const CGCapturedStmtInfo *Info) {
91     return Info->getKind() == CR_OpenMP;
92   }
93 
94   ~CGOpenMPRegionInfo() override = default;
95 
96 protected:
97   CGOpenMPRegionKind RegionKind;
98   RegionCodeGenTy CodeGen;
99   OpenMPDirectiveKind Kind;
100   bool HasCancel;
101 };
102 
103 /// API for captured statement code generation in OpenMP constructs.
104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
105 public:
106   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
107                              const RegionCodeGenTy &CodeGen,
108                              OpenMPDirectiveKind Kind, bool HasCancel,
109                              StringRef HelperName)
110       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
111                            HasCancel),
112         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
113     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
114   }
115 
116   /// Get a variable or parameter for storing global thread id
117   /// inside OpenMP construct.
118   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
119 
120   /// Get the name of the capture helper.
121   StringRef getHelperName() const override { return HelperName; }
122 
123   static bool classof(const CGCapturedStmtInfo *Info) {
124     return CGOpenMPRegionInfo::classof(Info) &&
125            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
126                ParallelOutlinedRegion;
127   }
128 
129 private:
130   /// A variable or parameter storing global thread id for OpenMP
131   /// constructs.
132   const VarDecl *ThreadIDVar;
133   StringRef HelperName;
134 };
135 
136 /// API for captured statement code generation in OpenMP constructs.
137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
138 public:
139   class UntiedTaskActionTy final : public PrePostActionTy {
140     bool Untied;
141     const VarDecl *PartIDVar;
142     const RegionCodeGenTy UntiedCodeGen;
143     llvm::SwitchInst *UntiedSwitch = nullptr;
144 
145   public:
146     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
147                        const RegionCodeGenTy &UntiedCodeGen)
148         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
149     void Enter(CodeGenFunction &CGF) override {
150       if (Untied) {
151         // Emit task switching point.
152         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
153             CGF.GetAddrOfLocalVar(PartIDVar),
154             PartIDVar->getType()->castAs<PointerType>());
155         llvm::Value *Res =
156             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
157         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
158         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
159         CGF.EmitBlock(DoneBB);
160         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
161         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
162         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
163                               CGF.Builder.GetInsertBlock());
164         emitUntiedSwitch(CGF);
165       }
166     }
167     void emitUntiedSwitch(CodeGenFunction &CGF) const {
168       if (Untied) {
169         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
170             CGF.GetAddrOfLocalVar(PartIDVar),
171             PartIDVar->getType()->castAs<PointerType>());
172         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
173                               PartIdLVal);
174         UntiedCodeGen(CGF);
175         CodeGenFunction::JumpDest CurPoint =
176             CGF.getJumpDestInCurrentScope(".untied.next.");
177         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
178         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
179         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               CGF.Builder.GetInsertBlock());
181         CGF.EmitBranchThroughCleanup(CurPoint);
182         CGF.EmitBlock(CurPoint.getBlock());
183       }
184     }
185     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
186   };
187   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
188                                  const VarDecl *ThreadIDVar,
189                                  const RegionCodeGenTy &CodeGen,
190                                  OpenMPDirectiveKind Kind, bool HasCancel,
191                                  const UntiedTaskActionTy &Action)
192       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
193         ThreadIDVar(ThreadIDVar), Action(Action) {
194     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
195   }
196 
197   /// Get a variable or parameter for storing global thread id
198   /// inside OpenMP construct.
199   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
200 
201   /// Get an LValue for the current ThreadID variable.
202   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
203 
204   /// Get the name of the capture helper.
205   StringRef getHelperName() const override { return ".omp_outlined."; }
206 
207   void emitUntiedSwitch(CodeGenFunction &CGF) override {
208     Action.emitUntiedSwitch(CGF);
209   }
210 
211   static bool classof(const CGCapturedStmtInfo *Info) {
212     return CGOpenMPRegionInfo::classof(Info) &&
213            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
214                TaskOutlinedRegion;
215   }
216 
217 private:
218   /// A variable or parameter storing global thread id for OpenMP
219   /// constructs.
220   const VarDecl *ThreadIDVar;
221   /// Action for emitting code for untied tasks.
222   const UntiedTaskActionTy &Action;
223 };
224 
225 /// API for inlined captured statement code generation in OpenMP
226 /// constructs.
227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
228 public:
229   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
230                             const RegionCodeGenTy &CodeGen,
231                             OpenMPDirectiveKind Kind, bool HasCancel)
232       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
233         OldCSI(OldCSI),
234         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
235 
236   // Retrieve the value of the context parameter.
237   llvm::Value *getContextValue() const override {
238     if (OuterRegionInfo)
239       return OuterRegionInfo->getContextValue();
240     llvm_unreachable("No context value for inlined OpenMP region");
241   }
242 
243   void setContextValue(llvm::Value *V) override {
244     if (OuterRegionInfo) {
245       OuterRegionInfo->setContextValue(V);
246       return;
247     }
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   /// Lookup the captured field decl for a variable.
252   const FieldDecl *lookup(const VarDecl *VD) const override {
253     if (OuterRegionInfo)
254       return OuterRegionInfo->lookup(VD);
255     // If there is no outer outlined region,no need to lookup in a list of
256     // captured variables, we can use the original one.
257     return nullptr;
258   }
259 
260   FieldDecl *getThisFieldDecl() const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->getThisFieldDecl();
263     return nullptr;
264   }
265 
266   /// Get a variable or parameter for storing global thread id
267   /// inside OpenMP construct.
268   const VarDecl *getThreadIDVariable() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThreadIDVariable();
271     return nullptr;
272   }
273 
274   /// Get an LValue for the current ThreadID variable.
275   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
278     llvm_unreachable("No LValue for inlined OpenMP construct");
279   }
280 
281   /// Get the name of the capture helper.
282   StringRef getHelperName() const override {
283     if (auto *OuterRegionInfo = getOldCSI())
284       return OuterRegionInfo->getHelperName();
285     llvm_unreachable("No helper name for inlined OpenMP construct");
286   }
287 
288   void emitUntiedSwitch(CodeGenFunction &CGF) override {
289     if (OuterRegionInfo)
290       OuterRegionInfo->emitUntiedSwitch(CGF);
291   }
292 
293   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
294 
295   static bool classof(const CGCapturedStmtInfo *Info) {
296     return CGOpenMPRegionInfo::classof(Info) &&
297            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
298   }
299 
300   ~CGOpenMPInlinedRegionInfo() override = default;
301 
302 private:
303   /// CodeGen info about outer OpenMP region.
304   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
305   CGOpenMPRegionInfo *OuterRegionInfo;
306 };
307 
308 /// API for captured statement code generation in OpenMP target
309 /// constructs. For this captures, implicit parameters are used instead of the
310 /// captured fields. The name of the target region has to be unique in a given
311 /// application so it is provided by the client, because only the client has
312 /// the information to generate that.
313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
314 public:
315   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
316                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
317       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
318                            /*HasCancel=*/false),
319         HelperName(HelperName) {}
320 
321   /// This is unused for target regions because each starts executing
322   /// with a single thread.
323   const VarDecl *getThreadIDVariable() const override { return nullptr; }
324 
325   /// Get the name of the capture helper.
326   StringRef getHelperName() const override { return HelperName; }
327 
328   static bool classof(const CGCapturedStmtInfo *Info) {
329     return CGOpenMPRegionInfo::classof(Info) &&
330            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
331   }
332 
333 private:
334   StringRef HelperName;
335 };
336 
337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
338   llvm_unreachable("No codegen for expressions");
339 }
340 /// API for generation of expressions captured in a innermost OpenMP
341 /// region.
342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
343 public:
344   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
345       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
346                                   OMPD_unknown,
347                                   /*HasCancel=*/false),
348         PrivScope(CGF) {
349     // Make sure the globals captured in the provided statement are local by
350     // using the privatization logic. We assume the same variable is not
351     // captured more than once.
352     for (const auto &C : CS.captures()) {
353       if (!C.capturesVariable() && !C.capturesVariableByCopy())
354         continue;
355 
356       const VarDecl *VD = C.getCapturedVar();
357       if (VD->isLocalVarDeclOrParm())
358         continue;
359 
360       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
361                       /*RefersToEnclosingVariableOrCapture=*/false,
362                       VD->getType().getNonReferenceType(), VK_LValue,
363                       C.getLocation());
364       PrivScope.addPrivate(
365           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
366     }
367     (void)PrivScope.Privatize();
368   }
369 
370   /// Lookup the captured field decl for a variable.
371   const FieldDecl *lookup(const VarDecl *VD) const override {
372     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
373       return FD;
374     return nullptr;
375   }
376 
377   /// Emit the captured statement body.
378   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
379     llvm_unreachable("No body for expressions");
380   }
381 
382   /// Get a variable or parameter for storing global thread id
383   /// inside OpenMP construct.
384   const VarDecl *getThreadIDVariable() const override {
385     llvm_unreachable("No thread id for expressions");
386   }
387 
388   /// Get the name of the capture helper.
389   StringRef getHelperName() const override {
390     llvm_unreachable("No helper name for expressions");
391   }
392 
393   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
394 
395 private:
396   /// Private scope to capture global variables.
397   CodeGenFunction::OMPPrivateScope PrivScope;
398 };
399 
400 /// RAII for emitting code of OpenMP constructs.
401 class InlinedOpenMPRegionRAII {
402   CodeGenFunction &CGF;
403   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
404   FieldDecl *LambdaThisCaptureField = nullptr;
405   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
406 
407 public:
408   /// Constructs region for combined constructs.
409   /// \param CodeGen Code generation sequence for combined directives. Includes
410   /// a list of functions used for code generation of implicitly inlined
411   /// regions.
412   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
413                           OpenMPDirectiveKind Kind, bool HasCancel)
414       : CGF(CGF) {
415     // Start emission for the construct.
416     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
417         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
418     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
419     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
420     CGF.LambdaThisCaptureField = nullptr;
421     BlockInfo = CGF.BlockInfo;
422     CGF.BlockInfo = nullptr;
423   }
424 
425   ~InlinedOpenMPRegionRAII() {
426     // Restore original CapturedStmtInfo only if we're done with code emission.
427     auto *OldCSI =
428         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
429     delete CGF.CapturedStmtInfo;
430     CGF.CapturedStmtInfo = OldCSI;
431     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
433     CGF.BlockInfo = BlockInfo;
434   }
435 };
436 
437 /// Values for bit flags used in the ident_t to describe the fields.
438 /// All enumeric elements are named and described in accordance with the code
439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
440 enum OpenMPLocationFlags : unsigned {
441   /// Use trampoline for internal microtask.
442   OMP_IDENT_IMD = 0x01,
443   /// Use c-style ident structure.
444   OMP_IDENT_KMPC = 0x02,
445   /// Atomic reduction option for kmpc_reduce.
446   OMP_ATOMIC_REDUCE = 0x10,
447   /// Explicit 'barrier' directive.
448   OMP_IDENT_BARRIER_EXPL = 0x20,
449   /// Implicit barrier in code.
450   OMP_IDENT_BARRIER_IMPL = 0x40,
451   /// Implicit barrier in 'for' directive.
452   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
453   /// Implicit barrier in 'sections' directive.
454   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
455   /// Implicit barrier in 'single' directive.
456   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
457   /// Call of __kmp_for_static_init for static loop.
458   OMP_IDENT_WORK_LOOP = 0x200,
459   /// Call of __kmp_for_static_init for sections.
460   OMP_IDENT_WORK_SECTIONS = 0x400,
461   /// Call of __kmp_for_static_init for distribute.
462   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
463   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
464 };
465 
466 namespace {
467 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
468 /// Values for bit flags for marking which requires clauses have been used.
469 enum OpenMPOffloadingRequiresDirFlags : int64_t {
470   /// flag undefined.
471   OMP_REQ_UNDEFINED               = 0x000,
472   /// no requires clause present.
473   OMP_REQ_NONE                    = 0x001,
474   /// reverse_offload clause.
475   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
476   /// unified_address clause.
477   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
478   /// unified_shared_memory clause.
479   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
480   /// dynamic_allocators clause.
481   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
482   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
483 };
484 
485 enum OpenMPOffloadingReservedDeviceIDs {
486   /// Device ID if the device was not defined, runtime should get it
487   /// from environment variables in the spec.
488   OMP_DEVICEID_UNDEF = -1,
489 };
490 } // anonymous namespace
491 
492 /// Describes ident structure that describes a source location.
493 /// All descriptions are taken from
494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
495 /// Original structure:
496 /// typedef struct ident {
497 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
498 ///                                  see above  */
499 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
500 ///                                  KMP_IDENT_KMPC identifies this union
501 ///                                  member  */
502 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
503 ///                                  see above */
504 ///#if USE_ITT_BUILD
505 ///                            /*  but currently used for storing
506 ///                                region-specific ITT */
507 ///                            /*  contextual information. */
508 ///#endif /* USE_ITT_BUILD */
509 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
510 ///                                 C++  */
511 ///    char const *psource;    /**< String describing the source location.
512 ///                            The string is composed of semi-colon separated
513 //                             fields which describe the source file,
514 ///                            the function and a pair of line numbers that
515 ///                            delimit the construct.
516 ///                             */
517 /// } ident_t;
518 enum IdentFieldIndex {
519   /// might be used in Fortran
520   IdentField_Reserved_1,
521   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
522   IdentField_Flags,
523   /// Not really used in Fortran any more
524   IdentField_Reserved_2,
525   /// Source[4] in Fortran, do not use for C++
526   IdentField_Reserved_3,
527   /// String describing the source location. The string is composed of
528   /// semi-colon separated fields which describe the source file, the function
529   /// and a pair of line numbers that delimit the construct.
530   IdentField_PSource
531 };
532 
533 /// Schedule types for 'omp for' loops (these enumerators are taken from
534 /// the enum sched_type in kmp.h).
535 enum OpenMPSchedType {
536   /// Lower bound for default (unordered) versions.
537   OMP_sch_lower = 32,
538   OMP_sch_static_chunked = 33,
539   OMP_sch_static = 34,
540   OMP_sch_dynamic_chunked = 35,
541   OMP_sch_guided_chunked = 36,
542   OMP_sch_runtime = 37,
543   OMP_sch_auto = 38,
544   /// static with chunk adjustment (e.g., simd)
545   OMP_sch_static_balanced_chunked = 45,
546   /// Lower bound for 'ordered' versions.
547   OMP_ord_lower = 64,
548   OMP_ord_static_chunked = 65,
549   OMP_ord_static = 66,
550   OMP_ord_dynamic_chunked = 67,
551   OMP_ord_guided_chunked = 68,
552   OMP_ord_runtime = 69,
553   OMP_ord_auto = 70,
554   OMP_sch_default = OMP_sch_static,
555   /// dist_schedule types
556   OMP_dist_sch_static_chunked = 91,
557   OMP_dist_sch_static = 92,
558   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
559   /// Set if the monotonic schedule modifier was present.
560   OMP_sch_modifier_monotonic = (1 << 29),
561   /// Set if the nonmonotonic schedule modifier was present.
562   OMP_sch_modifier_nonmonotonic = (1 << 30),
563 };
564 
565 enum OpenMPRTLFunction {
566   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
567   /// kmpc_micro microtask, ...);
568   OMPRTL__kmpc_fork_call,
569   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
570   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
571   OMPRTL__kmpc_threadprivate_cached,
572   /// Call to void __kmpc_threadprivate_register( ident_t *,
573   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
574   OMPRTL__kmpc_threadprivate_register,
575   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
576   OMPRTL__kmpc_global_thread_num,
577   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_critical,
580   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
581   // global_tid, kmp_critical_name *crit, uintptr_t hint);
582   OMPRTL__kmpc_critical_with_hint,
583   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *crit);
585   OMPRTL__kmpc_end_critical,
586   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
587   // global_tid);
588   OMPRTL__kmpc_cancel_barrier,
589   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
590   OMPRTL__kmpc_barrier,
591   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
592   OMPRTL__kmpc_for_static_fini,
593   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
594   // global_tid);
595   OMPRTL__kmpc_serialized_parallel,
596   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
597   // global_tid);
598   OMPRTL__kmpc_end_serialized_parallel,
599   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
600   // kmp_int32 num_threads);
601   OMPRTL__kmpc_push_num_threads,
602   // Call to void __kmpc_flush(ident_t *loc);
603   OMPRTL__kmpc_flush,
604   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
605   OMPRTL__kmpc_master,
606   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
607   OMPRTL__kmpc_end_master,
608   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609   // int end_part);
610   OMPRTL__kmpc_omp_taskyield,
611   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_single,
613   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
614   OMPRTL__kmpc_end_single,
615   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
616   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
617   // kmp_routine_entry_t *task_entry);
618   OMPRTL__kmpc_omp_task_alloc,
619   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
620   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
621   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
622   // kmp_int64 device_id);
623   OMPRTL__kmpc_omp_target_task_alloc,
624   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
625   // new_task);
626   OMPRTL__kmpc_omp_task,
627   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
628   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
629   // kmp_int32 didit);
630   OMPRTL__kmpc_copyprivate,
631   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
632   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
633   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
634   OMPRTL__kmpc_reduce,
635   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
636   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
637   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
638   // *lck);
639   OMPRTL__kmpc_reduce_nowait,
640   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
641   // kmp_critical_name *lck);
642   OMPRTL__kmpc_end_reduce,
643   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
644   // kmp_critical_name *lck);
645   OMPRTL__kmpc_end_reduce_nowait,
646   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
647   // kmp_task_t * new_task);
648   OMPRTL__kmpc_omp_task_begin_if0,
649   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
650   // kmp_task_t * new_task);
651   OMPRTL__kmpc_omp_task_complete_if0,
652   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
653   OMPRTL__kmpc_ordered,
654   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
655   OMPRTL__kmpc_end_ordered,
656   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
657   // global_tid);
658   OMPRTL__kmpc_omp_taskwait,
659   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_taskgroup,
661   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
662   OMPRTL__kmpc_end_taskgroup,
663   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
664   // int proc_bind);
665   OMPRTL__kmpc_push_proc_bind,
666   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
667   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
668   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
669   OMPRTL__kmpc_omp_task_with_deps,
670   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
671   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
672   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
673   OMPRTL__kmpc_omp_wait_deps,
674   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
675   // global_tid, kmp_int32 cncl_kind);
676   OMPRTL__kmpc_cancellationpoint,
677   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
678   // kmp_int32 cncl_kind);
679   OMPRTL__kmpc_cancel,
680   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
681   // kmp_int32 num_teams, kmp_int32 thread_limit);
682   OMPRTL__kmpc_push_num_teams,
683   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
684   // microtask, ...);
685   OMPRTL__kmpc_fork_teams,
686   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
687   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
688   // sched, kmp_uint64 grainsize, void *task_dup);
689   OMPRTL__kmpc_taskloop,
690   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
691   // num_dims, struct kmp_dim *dims);
692   OMPRTL__kmpc_doacross_init,
693   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
694   OMPRTL__kmpc_doacross_fini,
695   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
696   // *vec);
697   OMPRTL__kmpc_doacross_post,
698   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
699   // *vec);
700   OMPRTL__kmpc_doacross_wait,
701   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
702   // *data);
703   OMPRTL__kmpc_task_reduction_init,
704   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
705   // *d);
706   OMPRTL__kmpc_task_reduction_get_th_data,
707   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
708   OMPRTL__kmpc_alloc,
709   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
710   OMPRTL__kmpc_free,
711 
712   //
713   // Offloading related calls
714   //
715   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
716   // size);
717   OMPRTL__kmpc_push_target_tripcount,
718   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
719   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
720   // *arg_types);
721   OMPRTL__tgt_target,
722   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
723   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
724   // *arg_types);
725   OMPRTL__tgt_target_nowait,
726   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
727   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
728   // *arg_types, int32_t num_teams, int32_t thread_limit);
729   OMPRTL__tgt_target_teams,
730   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
731   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
732   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
733   OMPRTL__tgt_target_teams_nowait,
734   // Call to void __tgt_register_requires(int64_t flags);
735   OMPRTL__tgt_register_requires,
736   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
737   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
738   OMPRTL__tgt_target_data_begin,
739   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
740   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
741   // *arg_types);
742   OMPRTL__tgt_target_data_begin_nowait,
743   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
744   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
745   OMPRTL__tgt_target_data_end,
746   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
747   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
748   // *arg_types);
749   OMPRTL__tgt_target_data_end_nowait,
750   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
751   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
752   OMPRTL__tgt_target_data_update,
753   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
754   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
755   // *arg_types);
756   OMPRTL__tgt_target_data_update_nowait,
757   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
758   OMPRTL__tgt_mapper_num_components,
759   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
760   // *base, void *begin, int64_t size, int64_t type);
761   OMPRTL__tgt_push_mapper_component,
762 };
763 
764 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
765 /// region.
766 class CleanupTy final : public EHScopeStack::Cleanup {
767   PrePostActionTy *Action;
768 
769 public:
770   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
771   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
772     if (!CGF.HaveInsertPoint())
773       return;
774     Action->Exit(CGF);
775   }
776 };
777 
778 } // anonymous namespace
779 
780 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
781   CodeGenFunction::RunCleanupsScope Scope(CGF);
782   if (PrePostAction) {
783     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
784     Callback(CodeGen, CGF, *PrePostAction);
785   } else {
786     PrePostActionTy Action;
787     Callback(CodeGen, CGF, Action);
788   }
789 }
790 
791 /// Check if the combiner is a call to UDR combiner and if it is so return the
792 /// UDR decl used for reduction.
793 static const OMPDeclareReductionDecl *
794 getReductionInit(const Expr *ReductionOp) {
795   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
796     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
797       if (const auto *DRE =
798               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
799         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
800           return DRD;
801   return nullptr;
802 }
803 
804 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
805                                              const OMPDeclareReductionDecl *DRD,
806                                              const Expr *InitOp,
807                                              Address Private, Address Original,
808                                              QualType Ty) {
809   if (DRD->getInitializer()) {
810     std::pair<llvm::Function *, llvm::Function *> Reduction =
811         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
812     const auto *CE = cast<CallExpr>(InitOp);
813     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
814     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
815     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
816     const auto *LHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
818     const auto *RHSDRE =
819         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
820     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
821     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
822                             [=]() { return Private; });
823     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
824                             [=]() { return Original; });
825     (void)PrivateScope.Privatize();
826     RValue Func = RValue::get(Reduction.second);
827     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
828     CGF.EmitIgnoredExpr(InitOp);
829   } else {
830     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
831     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
832     auto *GV = new llvm::GlobalVariable(
833         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
834         llvm::GlobalValue::PrivateLinkage, Init, Name);
835     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
836     RValue InitRVal;
837     switch (CGF.getEvaluationKind(Ty)) {
838     case TEK_Scalar:
839       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
840       break;
841     case TEK_Complex:
842       InitRVal =
843           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
844       break;
845     case TEK_Aggregate:
846       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
847       break;
848     }
849     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
850     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
851     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
852                          /*IsInitializer=*/false);
853   }
854 }
855 
856 /// Emit initialization of arrays of complex types.
857 /// \param DestAddr Address of the array.
858 /// \param Type Type of array.
859 /// \param Init Initial expression of array.
860 /// \param SrcAddr Address of the original array.
861 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
862                                  QualType Type, bool EmitDeclareReductionInit,
863                                  const Expr *Init,
864                                  const OMPDeclareReductionDecl *DRD,
865                                  Address SrcAddr = Address::invalid()) {
866   // Perform element-by-element initialization.
867   QualType ElementTy;
868 
869   // Drill down to the base element type on both arrays.
870   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
871   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
872   DestAddr =
873       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
874   if (DRD)
875     SrcAddr =
876         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
877 
878   llvm::Value *SrcBegin = nullptr;
879   if (DRD)
880     SrcBegin = SrcAddr.getPointer();
881   llvm::Value *DestBegin = DestAddr.getPointer();
882   // Cast from pointer to array type to pointer to single element.
883   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
884   // The basic structure here is a while-do loop.
885   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
886   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
887   llvm::Value *IsEmpty =
888       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
889   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
890 
891   // Enter the loop body, making that address the current address.
892   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
893   CGF.EmitBlock(BodyBB);
894 
895   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
896 
897   llvm::PHINode *SrcElementPHI = nullptr;
898   Address SrcElementCurrent = Address::invalid();
899   if (DRD) {
900     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
901                                           "omp.arraycpy.srcElementPast");
902     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
903     SrcElementCurrent =
904         Address(SrcElementPHI,
905                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906   }
907   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
908       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
909   DestElementPHI->addIncoming(DestBegin, EntryBB);
910   Address DestElementCurrent =
911       Address(DestElementPHI,
912               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
913 
914   // Emit copy.
915   {
916     CodeGenFunction::RunCleanupsScope InitScope(CGF);
917     if (EmitDeclareReductionInit) {
918       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
919                                        SrcElementCurrent, ElementTy);
920     } else
921       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
922                            /*IsInitializer=*/false);
923   }
924 
925   if (DRD) {
926     // Shift the address forward by one element.
927     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
928         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
929     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
930   }
931 
932   // Shift the address forward by one element.
933   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
934       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
935   // Check whether we've reached the end.
936   llvm::Value *Done =
937       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
938   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
939   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
940 
941   // Done.
942   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
943 }
944 
945 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
946   return CGF.EmitOMPSharedLValue(E);
947 }
948 
949 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
950                                             const Expr *E) {
951   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
952     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
953   return LValue();
954 }
955 
956 void ReductionCodeGen::emitAggregateInitialization(
957     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
958     const OMPDeclareReductionDecl *DRD) {
959   // Emit VarDecl with copy init for arrays.
960   // Get the address of the original variable captured in current
961   // captured region.
962   const auto *PrivateVD =
963       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
964   bool EmitDeclareReductionInit =
965       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
966   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
967                        EmitDeclareReductionInit,
968                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
969                                                 : PrivateVD->getInit(),
970                        DRD, SharedLVal.getAddress(CGF));
971 }
972 
973 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
974                                    ArrayRef<const Expr *> Privates,
975                                    ArrayRef<const Expr *> ReductionOps) {
976   ClausesData.reserve(Shareds.size());
977   SharedAddresses.reserve(Shareds.size());
978   Sizes.reserve(Shareds.size());
979   BaseDecls.reserve(Shareds.size());
980   auto IPriv = Privates.begin();
981   auto IRed = ReductionOps.begin();
982   for (const Expr *Ref : Shareds) {
983     ClausesData.emplace_back(Ref, *IPriv, *IRed);
984     std::advance(IPriv, 1);
985     std::advance(IRed, 1);
986   }
987 }
988 
989 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
990   assert(SharedAddresses.size() == N &&
991          "Number of generated lvalues must be exactly N.");
992   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
993   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
994   SharedAddresses.emplace_back(First, Second);
995 }
996 
997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
998   const auto *PrivateVD =
999       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1000   QualType PrivateType = PrivateVD->getType();
1001   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1002   if (!PrivateType->isVariablyModifiedType()) {
1003     Sizes.emplace_back(
1004         CGF.getTypeSize(
1005             SharedAddresses[N].first.getType().getNonReferenceType()),
1006         nullptr);
1007     return;
1008   }
1009   llvm::Value *Size;
1010   llvm::Value *SizeInChars;
1011   auto *ElemType = cast<llvm::PointerType>(
1012                        SharedAddresses[N].first.getPointer(CGF)->getType())
1013                        ->getElementType();
1014   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1015   if (AsArraySection) {
1016     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1017                                      SharedAddresses[N].first.getPointer(CGF));
1018     Size = CGF.Builder.CreateNUWAdd(
1019         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1020     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1021   } else {
1022     SizeInChars = CGF.getTypeSize(
1023         SharedAddresses[N].first.getType().getNonReferenceType());
1024     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1025   }
1026   Sizes.emplace_back(SizeInChars, Size);
1027   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1028       CGF,
1029       cast<OpaqueValueExpr>(
1030           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1031       RValue::get(Size));
1032   CGF.EmitVariablyModifiedType(PrivateType);
1033 }
1034 
1035 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1036                                          llvm::Value *Size) {
1037   const auto *PrivateVD =
1038       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1039   QualType PrivateType = PrivateVD->getType();
1040   if (!PrivateType->isVariablyModifiedType()) {
1041     assert(!Size && !Sizes[N].second &&
1042            "Size should be nullptr for non-variably modified reduction "
1043            "items.");
1044     return;
1045   }
1046   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1047       CGF,
1048       cast<OpaqueValueExpr>(
1049           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1050       RValue::get(Size));
1051   CGF.EmitVariablyModifiedType(PrivateType);
1052 }
1053 
1054 void ReductionCodeGen::emitInitialization(
1055     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1056     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1057   assert(SharedAddresses.size() > N && "No variable was generated");
1058   const auto *PrivateVD =
1059       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1060   const OMPDeclareReductionDecl *DRD =
1061       getReductionInit(ClausesData[N].ReductionOp);
1062   QualType PrivateType = PrivateVD->getType();
1063   PrivateAddr = CGF.Builder.CreateElementBitCast(
1064       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1065   QualType SharedType = SharedAddresses[N].first.getType();
1066   SharedLVal = CGF.MakeAddrLValue(
1067       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1068                                        CGF.ConvertTypeForMem(SharedType)),
1069       SharedType, SharedAddresses[N].first.getBaseInfo(),
1070       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1071   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1072     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1073   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1074     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1075                                      PrivateAddr, SharedLVal.getAddress(CGF),
1076                                      SharedLVal.getType());
1077   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1078              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1079     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1080                          PrivateVD->getType().getQualifiers(),
1081                          /*IsInitializer=*/false);
1082   }
1083 }
1084 
1085 bool ReductionCodeGen::needCleanups(unsigned N) {
1086   const auto *PrivateVD =
1087       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1088   QualType PrivateType = PrivateVD->getType();
1089   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1090   return DTorKind != QualType::DK_none;
1091 }
1092 
1093 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1094                                     Address PrivateAddr) {
1095   const auto *PrivateVD =
1096       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1097   QualType PrivateType = PrivateVD->getType();
1098   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1099   if (needCleanups(N)) {
1100     PrivateAddr = CGF.Builder.CreateElementBitCast(
1101         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1102     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1103   }
1104 }
1105 
1106 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1107                           LValue BaseLV) {
1108   BaseTy = BaseTy.getNonReferenceType();
1109   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1110          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1111     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1112       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1113     } else {
1114       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1115       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1116     }
1117     BaseTy = BaseTy->getPointeeType();
1118   }
1119   return CGF.MakeAddrLValue(
1120       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1121                                        CGF.ConvertTypeForMem(ElTy)),
1122       BaseLV.getType(), BaseLV.getBaseInfo(),
1123       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1124 }
1125 
1126 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1127                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1128                           llvm::Value *Addr) {
1129   Address Tmp = Address::invalid();
1130   Address TopTmp = Address::invalid();
1131   Address MostTopTmp = Address::invalid();
1132   BaseTy = BaseTy.getNonReferenceType();
1133   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1134          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1135     Tmp = CGF.CreateMemTemp(BaseTy);
1136     if (TopTmp.isValid())
1137       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1138     else
1139       MostTopTmp = Tmp;
1140     TopTmp = Tmp;
1141     BaseTy = BaseTy->getPointeeType();
1142   }
1143   llvm::Type *Ty = BaseLVType;
1144   if (Tmp.isValid())
1145     Ty = Tmp.getElementType();
1146   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1147   if (Tmp.isValid()) {
1148     CGF.Builder.CreateStore(Addr, Tmp);
1149     return MostTopTmp;
1150   }
1151   return Address(Addr, BaseLVAlignment);
1152 }
1153 
1154 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1155   const VarDecl *OrigVD = nullptr;
1156   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1157     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1159       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1160     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1161       Base = TempASE->getBase()->IgnoreParenImpCasts();
1162     DE = cast<DeclRefExpr>(Base);
1163     OrigVD = cast<VarDecl>(DE->getDecl());
1164   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1165     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1166     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1167       Base = TempASE->getBase()->IgnoreParenImpCasts();
1168     DE = cast<DeclRefExpr>(Base);
1169     OrigVD = cast<VarDecl>(DE->getDecl());
1170   }
1171   return OrigVD;
1172 }
1173 
1174 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1175                                                Address PrivateAddr) {
1176   const DeclRefExpr *DE;
1177   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1178     BaseDecls.emplace_back(OrigVD);
1179     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1180     LValue BaseLValue =
1181         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1182                     OriginalBaseLValue);
1183     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1184         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1185     llvm::Value *PrivatePointer =
1186         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1187             PrivateAddr.getPointer(),
1188             SharedAddresses[N].first.getAddress(CGF).getType());
1189     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1190     return castToBase(CGF, OrigVD->getType(),
1191                       SharedAddresses[N].first.getType(),
1192                       OriginalBaseLValue.getAddress(CGF).getType(),
1193                       OriginalBaseLValue.getAlignment(), Ptr);
1194   }
1195   BaseDecls.emplace_back(
1196       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1197   return PrivateAddr;
1198 }
1199 
1200 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1201   const OMPDeclareReductionDecl *DRD =
1202       getReductionInit(ClausesData[N].ReductionOp);
1203   return DRD && DRD->getInitializer();
1204 }
1205 
1206 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1207   return CGF.EmitLoadOfPointerLValue(
1208       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1209       getThreadIDVariable()->getType()->castAs<PointerType>());
1210 }
1211 
1212 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1213   if (!CGF.HaveInsertPoint())
1214     return;
1215   // 1.2.2 OpenMP Language Terminology
1216   // Structured block - An executable statement with a single entry at the
1217   // top and a single exit at the bottom.
1218   // The point of exit cannot be a branch out of the structured block.
1219   // longjmp() and throw() must not violate the entry/exit criteria.
1220   CGF.EHStack.pushTerminate();
1221   CodeGen(CGF);
1222   CGF.EHStack.popTerminate();
1223 }
1224 
1225 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1226     CodeGenFunction &CGF) {
1227   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1228                             getThreadIDVariable()->getType(),
1229                             AlignmentSource::Decl);
1230 }
1231 
1232 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1233                                        QualType FieldTy) {
1234   auto *Field = FieldDecl::Create(
1235       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1236       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1237       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1238   Field->setAccess(AS_public);
1239   DC->addDecl(Field);
1240   return Field;
1241 }
1242 
1243 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1244                                  StringRef Separator)
1245     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1246       OffloadEntriesInfoManager(CGM) {
1247   ASTContext &C = CGM.getContext();
1248   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1249   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1250   RD->startDefinition();
1251   // reserved_1
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // flags
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_2
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // reserved_3
1258   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1259   // psource
1260   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1261   RD->completeDefinition();
1262   IdentQTy = C.getRecordType(RD);
1263   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1264   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1265 
1266   loadOffloadInfoMetadata();
1267 }
1268 
1269 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1270                                             const GlobalDecl &OldGD,
1271                                             llvm::GlobalValue *OrigAddr,
1272                                             bool IsForDefinition) {
1273   // Emit at least a definition for the aliasee if the the address of the
1274   // original function is requested.
1275   if (IsForDefinition || OrigAddr)
1276     (void)CGM.GetAddrOfGlobal(NewGD);
1277   StringRef NewMangledName = CGM.getMangledName(NewGD);
1278   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1279   if (Addr && !Addr->isDeclaration()) {
1280     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1281     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1282     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1283 
1284     // Create a reference to the named value.  This ensures that it is emitted
1285     // if a deferred decl.
1286     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1287 
1288     // Create the new alias itself, but don't set a name yet.
1289     auto *GA =
1290         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1291 
1292     if (OrigAddr) {
1293       assert(OrigAddr->isDeclaration() && "Expected declaration");
1294 
1295       GA->takeName(OrigAddr);
1296       OrigAddr->replaceAllUsesWith(
1297           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1298       OrigAddr->eraseFromParent();
1299     } else {
1300       GA->setName(CGM.getMangledName(OldGD));
1301     }
1302 
1303     // Set attributes which are particular to an alias; this is a
1304     // specialization of the attributes which may be set on a global function.
1305     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1306         D->isWeakImported())
1307       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1308 
1309     CGM.SetCommonAttributes(OldGD, GA);
1310     return true;
1311   }
1312   return false;
1313 }
1314 
1315 void CGOpenMPRuntime::clear() {
1316   InternalVars.clear();
1317   // Clean non-target variable declarations possibly used only in debug info.
1318   for (const auto &Data : EmittedNonTargetVariables) {
1319     if (!Data.getValue().pointsToAliveValue())
1320       continue;
1321     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1322     if (!GV)
1323       continue;
1324     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1325       continue;
1326     GV->eraseFromParent();
1327   }
1328   // Emit aliases for the deferred aliasees.
1329   for (const auto &Pair : DeferredVariantFunction) {
1330     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1331     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1332     // If not able to emit alias, just emit original declaration.
1333     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1334                                 /*IsForDefinition=*/false);
1335   }
1336 }
1337 
1338 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1339   SmallString<128> Buffer;
1340   llvm::raw_svector_ostream OS(Buffer);
1341   StringRef Sep = FirstSeparator;
1342   for (StringRef Part : Parts) {
1343     OS << Sep << Part;
1344     Sep = Separator;
1345   }
1346   return OS.str();
1347 }
1348 
1349 static llvm::Function *
1350 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1351                           const Expr *CombinerInitializer, const VarDecl *In,
1352                           const VarDecl *Out, bool IsCombiner) {
1353   // void .omp_combiner.(Ty *in, Ty *out);
1354   ASTContext &C = CGM.getContext();
1355   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1356   FunctionArgList Args;
1357   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1358                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1360                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1361   Args.push_back(&OmpOutParm);
1362   Args.push_back(&OmpInParm);
1363   const CGFunctionInfo &FnInfo =
1364       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1365   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1366   std::string Name = CGM.getOpenMPRuntime().getName(
1367       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1368   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1369                                     Name, &CGM.getModule());
1370   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1371   if (CGM.getLangOpts().Optimize) {
1372     Fn->removeFnAttr(llvm::Attribute::NoInline);
1373     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1374     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1375   }
1376   CodeGenFunction CGF(CGM);
1377   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1378   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1379   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1380                     Out->getLocation());
1381   CodeGenFunction::OMPPrivateScope Scope(CGF);
1382   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1383   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1384     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1385         .getAddress(CGF);
1386   });
1387   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1388   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1389     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1390         .getAddress(CGF);
1391   });
1392   (void)Scope.Privatize();
1393   if (!IsCombiner && Out->hasInit() &&
1394       !CGF.isTrivialInitializer(Out->getInit())) {
1395     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1396                          Out->getType().getQualifiers(),
1397                          /*IsInitializer=*/true);
1398   }
1399   if (CombinerInitializer)
1400     CGF.EmitIgnoredExpr(CombinerInitializer);
1401   Scope.ForceCleanup();
1402   CGF.FinishFunction();
1403   return Fn;
1404 }
1405 
1406 void CGOpenMPRuntime::emitUserDefinedReduction(
1407     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1408   if (UDRMap.count(D) > 0)
1409     return;
1410   llvm::Function *Combiner = emitCombinerOrInitializer(
1411       CGM, D->getType(), D->getCombiner(),
1412       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1413       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1414       /*IsCombiner=*/true);
1415   llvm::Function *Initializer = nullptr;
1416   if (const Expr *Init = D->getInitializer()) {
1417     Initializer = emitCombinerOrInitializer(
1418         CGM, D->getType(),
1419         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1420                                                                      : nullptr,
1421         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1422         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1423         /*IsCombiner=*/false);
1424   }
1425   UDRMap.try_emplace(D, Combiner, Initializer);
1426   if (CGF) {
1427     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1428     Decls.second.push_back(D);
1429   }
1430 }
1431 
1432 std::pair<llvm::Function *, llvm::Function *>
1433 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1434   auto I = UDRMap.find(D);
1435   if (I != UDRMap.end())
1436     return I->second;
1437   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1438   return UDRMap.lookup(D);
1439 }
1440 
1441 namespace {
1442 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1443 // Builder if one is present.
1444 struct PushAndPopStackRAII {
1445   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1446                       bool HasCancel)
1447       : OMPBuilder(OMPBuilder) {
1448     if (!OMPBuilder)
1449       return;
1450 
1451     // The following callback is the crucial part of clangs cleanup process.
1452     //
1453     // NOTE:
1454     // Once the OpenMPIRBuilder is used to create parallel regions (and
1455     // similar), the cancellation destination (Dest below) is determined via
1456     // IP. That means if we have variables to finalize we split the block at IP,
1457     // use the new block (=BB) as destination to build a JumpDest (via
1458     // getJumpDestInCurrentScope(BB)) which then is fed to
1459     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1460     // to push & pop an FinalizationInfo object.
1461     // The FiniCB will still be needed but at the point where the
1462     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1463     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1464       assert(IP.getBlock()->end() == IP.getPoint() &&
1465              "Clang CG should cause non-terminated block!");
1466       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1467       CGF.Builder.restoreIP(IP);
1468       CodeGenFunction::JumpDest Dest =
1469           CGF.getOMPCancelDestination(OMPD_parallel);
1470       CGF.EmitBranchThroughCleanup(Dest);
1471     };
1472 
1473     // TODO: Remove this once we emit parallel regions through the
1474     //       OpenMPIRBuilder as it can do this setup internally.
1475     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1476         {FiniCB, OMPD_parallel, HasCancel});
1477     OMPBuilder->pushFinalizationCB(std::move(FI));
1478   }
1479   ~PushAndPopStackRAII() {
1480     if (OMPBuilder)
1481       OMPBuilder->popFinalizationCB();
1482   }
1483   llvm::OpenMPIRBuilder *OMPBuilder;
1484 };
1485 } // namespace
1486 
1487 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1488     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1489     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1490     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1491   assert(ThreadIDVar->getType()->isPointerType() &&
1492          "thread id variable must be of type kmp_int32 *");
1493   CodeGenFunction CGF(CGM, true);
1494   bool HasCancel = false;
1495   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1496     HasCancel = OPD->hasCancel();
1497   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1498     HasCancel = OPSD->hasCancel();
1499   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1500     HasCancel = OPFD->hasCancel();
1501   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1502     HasCancel = OPFD->hasCancel();
1503   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1504     HasCancel = OPFD->hasCancel();
1505   else if (const auto *OPFD =
1506                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1507     HasCancel = OPFD->hasCancel();
1508   else if (const auto *OPFD =
1509                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1510     HasCancel = OPFD->hasCancel();
1511 
1512   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1513   //       parallel region to make cancellation barriers work properly.
1514   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1515   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1516   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1517                                     HasCancel, OutlinedHelperName);
1518   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1519   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1520 }
1521 
1522 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1523     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1524     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1525   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1526   return emitParallelOrTeamsOutlinedFunction(
1527       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1528 }
1529 
1530 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1531     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1532     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1533   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1534   return emitParallelOrTeamsOutlinedFunction(
1535       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1536 }
1537 
1538 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1539     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1540     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1541     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1542     bool Tied, unsigned &NumberOfParts) {
1543   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1544                                               PrePostActionTy &) {
1545     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1546     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1547     llvm::Value *TaskArgs[] = {
1548         UpLoc, ThreadID,
1549         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1550                                     TaskTVar->getType()->castAs<PointerType>())
1551             .getPointer(CGF)};
1552     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1553   };
1554   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1555                                                             UntiedCodeGen);
1556   CodeGen.setAction(Action);
1557   assert(!ThreadIDVar->getType()->isPointerType() &&
1558          "thread id variable must be of type kmp_int32 for tasks");
1559   const OpenMPDirectiveKind Region =
1560       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1561                                                       : OMPD_task;
1562   const CapturedStmt *CS = D.getCapturedStmt(Region);
1563   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1564   CodeGenFunction CGF(CGM, true);
1565   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1566                                         InnermostKind,
1567                                         TD ? TD->hasCancel() : false, Action);
1568   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1569   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1570   if (!Tied)
1571     NumberOfParts = Action.getNumberOfParts();
1572   return Res;
1573 }
1574 
1575 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1576                              const RecordDecl *RD, const CGRecordLayout &RL,
1577                              ArrayRef<llvm::Constant *> Data) {
1578   llvm::StructType *StructTy = RL.getLLVMType();
1579   unsigned PrevIdx = 0;
1580   ConstantInitBuilder CIBuilder(CGM);
1581   auto DI = Data.begin();
1582   for (const FieldDecl *FD : RD->fields()) {
1583     unsigned Idx = RL.getLLVMFieldNo(FD);
1584     // Fill the alignment.
1585     for (unsigned I = PrevIdx; I < Idx; ++I)
1586       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1587     PrevIdx = Idx + 1;
1588     Fields.add(*DI);
1589     ++DI;
1590   }
1591 }
1592 
1593 template <class... As>
1594 static llvm::GlobalVariable *
1595 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1596                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1597                    As &&... Args) {
1598   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600   ConstantInitBuilder CIBuilder(CGM);
1601   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1602   buildStructValue(Fields, CGM, RD, RL, Data);
1603   return Fields.finishAndCreateGlobal(
1604       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1605       std::forward<As>(Args)...);
1606 }
1607 
1608 template <typename T>
1609 static void
1610 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1611                                          ArrayRef<llvm::Constant *> Data,
1612                                          T &Parent) {
1613   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1614   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1615   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1616   buildStructValue(Fields, CGM, RD, RL, Data);
1617   Fields.finishAndAddTo(Parent);
1618 }
1619 
1620 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1621   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1622   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1623   FlagsTy FlagsKey(Flags, Reserved2Flags);
1624   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1625   if (!Entry) {
1626     if (!DefaultOpenMPPSource) {
1627       // Initialize default location for psource field of ident_t structure of
1628       // all ident_t objects. Format is ";file;function;line;column;;".
1629       // Taken from
1630       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1631       DefaultOpenMPPSource =
1632           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1633       DefaultOpenMPPSource =
1634           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1635     }
1636 
1637     llvm::Constant *Data[] = {
1638         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1639         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1640         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1641         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1642     llvm::GlobalValue *DefaultOpenMPLocation =
1643         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1644                            llvm::GlobalValue::PrivateLinkage);
1645     DefaultOpenMPLocation->setUnnamedAddr(
1646         llvm::GlobalValue::UnnamedAddr::Global);
1647 
1648     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1649   }
1650   return Address(Entry, Align);
1651 }
1652 
1653 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1654                                              bool AtCurrentPoint) {
1655   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1657 
1658   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1659   if (AtCurrentPoint) {
1660     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1661         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1662   } else {
1663     Elem.second.ServiceInsertPt =
1664         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1665     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1666   }
1667 }
1668 
1669 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1670   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671   if (Elem.second.ServiceInsertPt) {
1672     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1673     Elem.second.ServiceInsertPt = nullptr;
1674     Ptr->eraseFromParent();
1675   }
1676 }
1677 
1678 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1679                                                  SourceLocation Loc,
1680                                                  unsigned Flags) {
1681   Flags |= OMP_IDENT_KMPC;
1682   // If no debug info is generated - return global default location.
1683   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1684       Loc.isInvalid())
1685     return getOrCreateDefaultLocation(Flags).getPointer();
1686 
1687   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1688 
1689   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1690   Address LocValue = Address::invalid();
1691   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1692   if (I != OpenMPLocThreadIDMap.end())
1693     LocValue = Address(I->second.DebugLoc, Align);
1694 
1695   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1696   // GetOpenMPThreadID was called before this routine.
1697   if (!LocValue.isValid()) {
1698     // Generate "ident_t .kmpc_loc.addr;"
1699     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1700     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1701     Elem.second.DebugLoc = AI.getPointer();
1702     LocValue = AI;
1703 
1704     if (!Elem.second.ServiceInsertPt)
1705       setLocThreadIdInsertPt(CGF);
1706     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1707     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1708     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1709                              CGF.getTypeSize(IdentQTy));
1710   }
1711 
1712   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1713   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1714   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1715   LValue PSource =
1716       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1717 
1718   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1719   if (OMPDebugLoc == nullptr) {
1720     SmallString<128> Buffer2;
1721     llvm::raw_svector_ostream OS2(Buffer2);
1722     // Build debug location
1723     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1724     OS2 << ";" << PLoc.getFilename() << ";";
1725     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1726       OS2 << FD->getQualifiedNameAsString();
1727     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1728     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1729     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1730   }
1731   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1732   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1733 
1734   // Our callers always pass this to a runtime function, so for
1735   // convenience, go ahead and return a naked pointer.
1736   return LocValue.getPointer();
1737 }
1738 
1739 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1740                                           SourceLocation Loc) {
1741   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1742 
1743   llvm::Value *ThreadID = nullptr;
1744   // Check whether we've already cached a load of the thread id in this
1745   // function.
1746   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1747   if (I != OpenMPLocThreadIDMap.end()) {
1748     ThreadID = I->second.ThreadID;
1749     if (ThreadID != nullptr)
1750       return ThreadID;
1751   }
1752   // If exceptions are enabled, do not use parameter to avoid possible crash.
1753   if (auto *OMPRegionInfo =
1754           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1755     if (OMPRegionInfo->getThreadIDVariable()) {
1756       // Check if this an outlined function with thread id passed as argument.
1757       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1758       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1759       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1760           !CGF.getLangOpts().CXXExceptions ||
1761           CGF.Builder.GetInsertBlock() == TopBlock ||
1762           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1763           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1764               TopBlock ||
1765           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1766               CGF.Builder.GetInsertBlock()) {
1767         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1768         // If value loaded in entry block, cache it and use it everywhere in
1769         // function.
1770         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1771           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1772           Elem.second.ThreadID = ThreadID;
1773         }
1774         return ThreadID;
1775       }
1776     }
1777   }
1778 
1779   // This is not an outlined function region - need to call __kmpc_int32
1780   // kmpc_global_thread_num(ident_t *loc).
1781   // Generate thread id value and cache this value for use across the
1782   // function.
1783   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1784   if (!Elem.second.ServiceInsertPt)
1785     setLocThreadIdInsertPt(CGF);
1786   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1787   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1788   llvm::CallInst *Call = CGF.Builder.CreateCall(
1789       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1790       emitUpdateLocation(CGF, Loc));
1791   Call->setCallingConv(CGF.getRuntimeCC());
1792   Elem.second.ThreadID = Call;
1793   return Call;
1794 }
1795 
1796 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1797   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1798   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1799     clearLocThreadIdInsertPt(CGF);
1800     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1801   }
1802   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1803     for(auto *D : FunctionUDRMap[CGF.CurFn])
1804       UDRMap.erase(D);
1805     FunctionUDRMap.erase(CGF.CurFn);
1806   }
1807   auto I = FunctionUDMMap.find(CGF.CurFn);
1808   if (I != FunctionUDMMap.end()) {
1809     for(auto *D : I->second)
1810       UDMMap.erase(D);
1811     FunctionUDMMap.erase(I);
1812   }
1813 }
1814 
1815 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1816   return IdentTy->getPointerTo();
1817 }
1818 
1819 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1820   if (!Kmpc_MicroTy) {
1821     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1822     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1823                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1824     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1825   }
1826   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1827 }
1828 
1829 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1830   llvm::FunctionCallee RTLFn = nullptr;
1831   switch (static_cast<OpenMPRTLFunction>(Function)) {
1832   case OMPRTL__kmpc_fork_call: {
1833     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1834     // microtask, ...);
1835     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1836                                 getKmpc_MicroPointerTy()};
1837     auto *FnTy =
1838         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1839     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1840     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1841       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1842         llvm::LLVMContext &Ctx = F->getContext();
1843         llvm::MDBuilder MDB(Ctx);
1844         // Annotate the callback behavior of the __kmpc_fork_call:
1845         //  - The callback callee is argument number 2 (microtask).
1846         //  - The first two arguments of the callback callee are unknown (-1).
1847         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1848         //    callback callee.
1849         F->addMetadata(
1850             llvm::LLVMContext::MD_callback,
1851             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1852                                         2, {-1, -1},
1853                                         /* VarArgsArePassed */ true)}));
1854       }
1855     }
1856     break;
1857   }
1858   case OMPRTL__kmpc_global_thread_num: {
1859     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1860     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1861     auto *FnTy =
1862         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1863     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1864     break;
1865   }
1866   case OMPRTL__kmpc_threadprivate_cached: {
1867     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1868     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1869     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1870                                 CGM.VoidPtrTy, CGM.SizeTy,
1871                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1872     auto *FnTy =
1873         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1874     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1875     break;
1876   }
1877   case OMPRTL__kmpc_critical: {
1878     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1879     // kmp_critical_name *crit);
1880     llvm::Type *TypeParams[] = {
1881         getIdentTyPointerTy(), CGM.Int32Ty,
1882         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1883     auto *FnTy =
1884         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1885     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1886     break;
1887   }
1888   case OMPRTL__kmpc_critical_with_hint: {
1889     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1890     // kmp_critical_name *crit, uintptr_t hint);
1891     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1892                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1893                                 CGM.IntPtrTy};
1894     auto *FnTy =
1895         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1896     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1897     break;
1898   }
1899   case OMPRTL__kmpc_threadprivate_register: {
1900     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1901     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1902     // typedef void *(*kmpc_ctor)(void *);
1903     auto *KmpcCtorTy =
1904         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905                                 /*isVarArg*/ false)->getPointerTo();
1906     // typedef void *(*kmpc_cctor)(void *, void *);
1907     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1908     auto *KmpcCopyCtorTy =
1909         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1910                                 /*isVarArg*/ false)
1911             ->getPointerTo();
1912     // typedef void (*kmpc_dtor)(void *);
1913     auto *KmpcDtorTy =
1914         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1915             ->getPointerTo();
1916     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1917                               KmpcCopyCtorTy, KmpcDtorTy};
1918     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1919                                         /*isVarArg*/ false);
1920     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1921     break;
1922   }
1923   case OMPRTL__kmpc_end_critical: {
1924     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1925     // kmp_critical_name *crit);
1926     llvm::Type *TypeParams[] = {
1927         getIdentTyPointerTy(), CGM.Int32Ty,
1928         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1929     auto *FnTy =
1930         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1931     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1932     break;
1933   }
1934   case OMPRTL__kmpc_cancel_barrier: {
1935     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1936     // global_tid);
1937     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1938     auto *FnTy =
1939         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1940     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1941     break;
1942   }
1943   case OMPRTL__kmpc_barrier: {
1944     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1945     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1949     break;
1950   }
1951   case OMPRTL__kmpc_for_static_fini: {
1952     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1953     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1954     auto *FnTy =
1955         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1957     break;
1958   }
1959   case OMPRTL__kmpc_push_num_threads: {
1960     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1961     // kmp_int32 num_threads)
1962     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1963                                 CGM.Int32Ty};
1964     auto *FnTy =
1965         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1966     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1967     break;
1968   }
1969   case OMPRTL__kmpc_serialized_parallel: {
1970     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1971     // global_tid);
1972     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1973     auto *FnTy =
1974         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1975     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1976     break;
1977   }
1978   case OMPRTL__kmpc_end_serialized_parallel: {
1979     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1980     // global_tid);
1981     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1982     auto *FnTy =
1983         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1984     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1985     break;
1986   }
1987   case OMPRTL__kmpc_flush: {
1988     // Build void __kmpc_flush(ident_t *loc);
1989     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1990     auto *FnTy =
1991         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1992     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1993     break;
1994   }
1995   case OMPRTL__kmpc_master: {
1996     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1997     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1998     auto *FnTy =
1999         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2000     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2001     break;
2002   }
2003   case OMPRTL__kmpc_end_master: {
2004     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2005     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2006     auto *FnTy =
2007         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2008     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2009     break;
2010   }
2011   case OMPRTL__kmpc_omp_taskyield: {
2012     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2013     // int end_part);
2014     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2015     auto *FnTy =
2016         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2017     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2018     break;
2019   }
2020   case OMPRTL__kmpc_single: {
2021     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2022     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2023     auto *FnTy =
2024         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2025     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2026     break;
2027   }
2028   case OMPRTL__kmpc_end_single: {
2029     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2030     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2031     auto *FnTy =
2032         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2034     break;
2035   }
2036   case OMPRTL__kmpc_omp_task_alloc: {
2037     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2038     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039     // kmp_routine_entry_t *task_entry);
2040     assert(KmpRoutineEntryPtrTy != nullptr &&
2041            "Type kmp_routine_entry_t must be created.");
2042     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2044     // Return void * and then cast to particular kmp_task_t type.
2045     auto *FnTy =
2046         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2047     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2048     break;
2049   }
2050   case OMPRTL__kmpc_omp_target_task_alloc: {
2051     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2052     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2053     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2054     assert(KmpRoutineEntryPtrTy != nullptr &&
2055            "Type kmp_routine_entry_t must be created.");
2056     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2058                                 CGM.Int64Ty};
2059     // Return void * and then cast to particular kmp_task_t type.
2060     auto *FnTy =
2061         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2062     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2063     break;
2064   }
2065   case OMPRTL__kmpc_omp_task: {
2066     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2067     // *new_task);
2068     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2069                                 CGM.VoidPtrTy};
2070     auto *FnTy =
2071         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2072     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2073     break;
2074   }
2075   case OMPRTL__kmpc_copyprivate: {
2076     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2077     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2078     // kmp_int32 didit);
2079     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2080     auto *CpyFnTy =
2081         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2082     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2083                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2084                                 CGM.Int32Ty};
2085     auto *FnTy =
2086         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2087     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2088     break;
2089   }
2090   case OMPRTL__kmpc_reduce: {
2091     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2092     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2093     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2094     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2095     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2096                                                /*isVarArg=*/false);
2097     llvm::Type *TypeParams[] = {
2098         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2099         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2100         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2101     auto *FnTy =
2102         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2103     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2104     break;
2105   }
2106   case OMPRTL__kmpc_reduce_nowait: {
2107     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2108     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2109     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2110     // *lck);
2111     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2112     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2113                                                /*isVarArg=*/false);
2114     llvm::Type *TypeParams[] = {
2115         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2116         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2117         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2118     auto *FnTy =
2119         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2120     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2121     break;
2122   }
2123   case OMPRTL__kmpc_end_reduce: {
2124     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2125     // kmp_critical_name *lck);
2126     llvm::Type *TypeParams[] = {
2127         getIdentTyPointerTy(), CGM.Int32Ty,
2128         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2129     auto *FnTy =
2130         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2131     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2132     break;
2133   }
2134   case OMPRTL__kmpc_end_reduce_nowait: {
2135     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2136     // kmp_critical_name *lck);
2137     llvm::Type *TypeParams[] = {
2138         getIdentTyPointerTy(), CGM.Int32Ty,
2139         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2140     auto *FnTy =
2141         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2142     RTLFn =
2143         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2144     break;
2145   }
2146   case OMPRTL__kmpc_omp_task_begin_if0: {
2147     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2148     // *new_task);
2149     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150                                 CGM.VoidPtrTy};
2151     auto *FnTy =
2152         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2153     RTLFn =
2154         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2155     break;
2156   }
2157   case OMPRTL__kmpc_omp_task_complete_if0: {
2158     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2159     // *new_task);
2160     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161                                 CGM.VoidPtrTy};
2162     auto *FnTy =
2163         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2164     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2165                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2166     break;
2167   }
2168   case OMPRTL__kmpc_ordered: {
2169     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2170     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2171     auto *FnTy =
2172         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2173     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2174     break;
2175   }
2176   case OMPRTL__kmpc_end_ordered: {
2177     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2178     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2179     auto *FnTy =
2180         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2181     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2182     break;
2183   }
2184   case OMPRTL__kmpc_omp_taskwait: {
2185     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2186     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2187     auto *FnTy =
2188         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2189     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2190     break;
2191   }
2192   case OMPRTL__kmpc_taskgroup: {
2193     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2194     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2195     auto *FnTy =
2196         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2197     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2198     break;
2199   }
2200   case OMPRTL__kmpc_end_taskgroup: {
2201     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2202     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2203     auto *FnTy =
2204         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2205     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2206     break;
2207   }
2208   case OMPRTL__kmpc_push_proc_bind: {
2209     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2210     // int proc_bind)
2211     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2212     auto *FnTy =
2213         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2214     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2215     break;
2216   }
2217   case OMPRTL__kmpc_omp_task_with_deps: {
2218     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2219     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2220     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2221     llvm::Type *TypeParams[] = {
2222         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2223         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2224     auto *FnTy =
2225         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2226     RTLFn =
2227         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2228     break;
2229   }
2230   case OMPRTL__kmpc_omp_wait_deps: {
2231     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2232     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2233     // kmp_depend_info_t *noalias_dep_list);
2234     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2235                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2236                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2237     auto *FnTy =
2238         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2239     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2240     break;
2241   }
2242   case OMPRTL__kmpc_cancellationpoint: {
2243     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2244     // global_tid, kmp_int32 cncl_kind)
2245     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2246     auto *FnTy =
2247         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2248     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2249     break;
2250   }
2251   case OMPRTL__kmpc_cancel: {
2252     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2253     // kmp_int32 cncl_kind)
2254     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2255     auto *FnTy =
2256         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2257     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2258     break;
2259   }
2260   case OMPRTL__kmpc_push_num_teams: {
2261     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2262     // kmp_int32 num_teams, kmp_int32 num_threads)
2263     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2264         CGM.Int32Ty};
2265     auto *FnTy =
2266         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2267     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2268     break;
2269   }
2270   case OMPRTL__kmpc_fork_teams: {
2271     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2272     // microtask, ...);
2273     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2274                                 getKmpc_MicroPointerTy()};
2275     auto *FnTy =
2276         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2277     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2278     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2279       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2280         llvm::LLVMContext &Ctx = F->getContext();
2281         llvm::MDBuilder MDB(Ctx);
2282         // Annotate the callback behavior of the __kmpc_fork_teams:
2283         //  - The callback callee is argument number 2 (microtask).
2284         //  - The first two arguments of the callback callee are unknown (-1).
2285         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2286         //    callback callee.
2287         F->addMetadata(
2288             llvm::LLVMContext::MD_callback,
2289             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2290                                         2, {-1, -1},
2291                                         /* VarArgsArePassed */ true)}));
2292       }
2293     }
2294     break;
2295   }
2296   case OMPRTL__kmpc_taskloop: {
2297     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2298     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2299     // sched, kmp_uint64 grainsize, void *task_dup);
2300     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2301                                 CGM.IntTy,
2302                                 CGM.VoidPtrTy,
2303                                 CGM.IntTy,
2304                                 CGM.Int64Ty->getPointerTo(),
2305                                 CGM.Int64Ty->getPointerTo(),
2306                                 CGM.Int64Ty,
2307                                 CGM.IntTy,
2308                                 CGM.IntTy,
2309                                 CGM.Int64Ty,
2310                                 CGM.VoidPtrTy};
2311     auto *FnTy =
2312         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2313     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2314     break;
2315   }
2316   case OMPRTL__kmpc_doacross_init: {
2317     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2318     // num_dims, struct kmp_dim *dims);
2319     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2320                                 CGM.Int32Ty,
2321                                 CGM.Int32Ty,
2322                                 CGM.VoidPtrTy};
2323     auto *FnTy =
2324         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2326     break;
2327   }
2328   case OMPRTL__kmpc_doacross_fini: {
2329     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2330     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2331     auto *FnTy =
2332         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2333     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2334     break;
2335   }
2336   case OMPRTL__kmpc_doacross_post: {
2337     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2338     // *vec);
2339     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2340                                 CGM.Int64Ty->getPointerTo()};
2341     auto *FnTy =
2342         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2344     break;
2345   }
2346   case OMPRTL__kmpc_doacross_wait: {
2347     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2348     // *vec);
2349     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2350                                 CGM.Int64Ty->getPointerTo()};
2351     auto *FnTy =
2352         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2353     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2354     break;
2355   }
2356   case OMPRTL__kmpc_task_reduction_init: {
2357     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2358     // *data);
2359     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2362     RTLFn =
2363         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2364     break;
2365   }
2366   case OMPRTL__kmpc_task_reduction_get_th_data: {
2367     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2368     // *d);
2369     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2372     RTLFn = CGM.CreateRuntimeFunction(
2373         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2374     break;
2375   }
2376   case OMPRTL__kmpc_alloc: {
2377     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2378     // al); omp_allocator_handle_t type is void *.
2379     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2380     auto *FnTy =
2381         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2382     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2383     break;
2384   }
2385   case OMPRTL__kmpc_free: {
2386     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2387     // al); omp_allocator_handle_t type is void *.
2388     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2389     auto *FnTy =
2390         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2391     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2392     break;
2393   }
2394   case OMPRTL__kmpc_push_target_tripcount: {
2395     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2396     // size);
2397     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2398     llvm::FunctionType *FnTy =
2399         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2400     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2401     break;
2402   }
2403   case OMPRTL__tgt_target: {
2404     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2405     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2406     // *arg_types);
2407     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408                                 CGM.VoidPtrTy,
2409                                 CGM.Int32Ty,
2410                                 CGM.VoidPtrPtrTy,
2411                                 CGM.VoidPtrPtrTy,
2412                                 CGM.Int64Ty->getPointerTo(),
2413                                 CGM.Int64Ty->getPointerTo()};
2414     auto *FnTy =
2415         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2416     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2417     break;
2418   }
2419   case OMPRTL__tgt_target_nowait: {
2420     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2421     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2422     // int64_t *arg_types);
2423     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2424                                 CGM.VoidPtrTy,
2425                                 CGM.Int32Ty,
2426                                 CGM.VoidPtrPtrTy,
2427                                 CGM.VoidPtrPtrTy,
2428                                 CGM.Int64Ty->getPointerTo(),
2429                                 CGM.Int64Ty->getPointerTo()};
2430     auto *FnTy =
2431         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2433     break;
2434   }
2435   case OMPRTL__tgt_target_teams: {
2436     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2437     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2438     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2439     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2440                                 CGM.VoidPtrTy,
2441                                 CGM.Int32Ty,
2442                                 CGM.VoidPtrPtrTy,
2443                                 CGM.VoidPtrPtrTy,
2444                                 CGM.Int64Ty->getPointerTo(),
2445                                 CGM.Int64Ty->getPointerTo(),
2446                                 CGM.Int32Ty,
2447                                 CGM.Int32Ty};
2448     auto *FnTy =
2449         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2450     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2451     break;
2452   }
2453   case OMPRTL__tgt_target_teams_nowait: {
2454     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2455     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2456     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2457     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2458                                 CGM.VoidPtrTy,
2459                                 CGM.Int32Ty,
2460                                 CGM.VoidPtrPtrTy,
2461                                 CGM.VoidPtrPtrTy,
2462                                 CGM.Int64Ty->getPointerTo(),
2463                                 CGM.Int64Ty->getPointerTo(),
2464                                 CGM.Int32Ty,
2465                                 CGM.Int32Ty};
2466     auto *FnTy =
2467         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2468     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2469     break;
2470   }
2471   case OMPRTL__tgt_register_requires: {
2472     // Build void __tgt_register_requires(int64_t flags);
2473     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2474     auto *FnTy =
2475         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2477     break;
2478   }
2479   case OMPRTL__tgt_target_data_begin: {
2480     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2481     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2482     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2483                                 CGM.Int32Ty,
2484                                 CGM.VoidPtrPtrTy,
2485                                 CGM.VoidPtrPtrTy,
2486                                 CGM.Int64Ty->getPointerTo(),
2487                                 CGM.Int64Ty->getPointerTo()};
2488     auto *FnTy =
2489         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2491     break;
2492   }
2493   case OMPRTL__tgt_target_data_begin_nowait: {
2494     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2495     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2496     // *arg_types);
2497     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2498                                 CGM.Int32Ty,
2499                                 CGM.VoidPtrPtrTy,
2500                                 CGM.VoidPtrPtrTy,
2501                                 CGM.Int64Ty->getPointerTo(),
2502                                 CGM.Int64Ty->getPointerTo()};
2503     auto *FnTy =
2504         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2505     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2506     break;
2507   }
2508   case OMPRTL__tgt_target_data_end: {
2509     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2510     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2511     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2512                                 CGM.Int32Ty,
2513                                 CGM.VoidPtrPtrTy,
2514                                 CGM.VoidPtrPtrTy,
2515                                 CGM.Int64Ty->getPointerTo(),
2516                                 CGM.Int64Ty->getPointerTo()};
2517     auto *FnTy =
2518         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2520     break;
2521   }
2522   case OMPRTL__tgt_target_data_end_nowait: {
2523     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2524     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2525     // *arg_types);
2526     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2527                                 CGM.Int32Ty,
2528                                 CGM.VoidPtrPtrTy,
2529                                 CGM.VoidPtrPtrTy,
2530                                 CGM.Int64Ty->getPointerTo(),
2531                                 CGM.Int64Ty->getPointerTo()};
2532     auto *FnTy =
2533         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2534     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2535     break;
2536   }
2537   case OMPRTL__tgt_target_data_update: {
2538     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2539     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2540     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2541                                 CGM.Int32Ty,
2542                                 CGM.VoidPtrPtrTy,
2543                                 CGM.VoidPtrPtrTy,
2544                                 CGM.Int64Ty->getPointerTo(),
2545                                 CGM.Int64Ty->getPointerTo()};
2546     auto *FnTy =
2547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2549     break;
2550   }
2551   case OMPRTL__tgt_target_data_update_nowait: {
2552     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2553     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2554     // *arg_types);
2555     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2556                                 CGM.Int32Ty,
2557                                 CGM.VoidPtrPtrTy,
2558                                 CGM.VoidPtrPtrTy,
2559                                 CGM.Int64Ty->getPointerTo(),
2560                                 CGM.Int64Ty->getPointerTo()};
2561     auto *FnTy =
2562         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2563     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2564     break;
2565   }
2566   case OMPRTL__tgt_mapper_num_components: {
2567     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2568     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2569     auto *FnTy =
2570         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2571     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2572     break;
2573   }
2574   case OMPRTL__tgt_push_mapper_component: {
2575     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2576     // *base, void *begin, int64_t size, int64_t type);
2577     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2578                                 CGM.Int64Ty, CGM.Int64Ty};
2579     auto *FnTy =
2580         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2581     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2582     break;
2583   }
2584   }
2585   assert(RTLFn && "Unable to find OpenMP runtime function");
2586   return RTLFn;
2587 }
2588 
2589 llvm::FunctionCallee
2590 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2591   assert((IVSize == 32 || IVSize == 64) &&
2592          "IV size is not compatible with the omp runtime");
2593   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2594                                             : "__kmpc_for_static_init_4u")
2595                                 : (IVSigned ? "__kmpc_for_static_init_8"
2596                                             : "__kmpc_for_static_init_8u");
2597   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2598   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2599   llvm::Type *TypeParams[] = {
2600     getIdentTyPointerTy(),                     // loc
2601     CGM.Int32Ty,                               // tid
2602     CGM.Int32Ty,                               // schedtype
2603     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2604     PtrTy,                                     // p_lower
2605     PtrTy,                                     // p_upper
2606     PtrTy,                                     // p_stride
2607     ITy,                                       // incr
2608     ITy                                        // chunk
2609   };
2610   auto *FnTy =
2611       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2612   return CGM.CreateRuntimeFunction(FnTy, Name);
2613 }
2614 
2615 llvm::FunctionCallee
2616 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2617   assert((IVSize == 32 || IVSize == 64) &&
2618          "IV size is not compatible with the omp runtime");
2619   StringRef Name =
2620       IVSize == 32
2621           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2622           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2623   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2624   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2625                                CGM.Int32Ty,           // tid
2626                                CGM.Int32Ty,           // schedtype
2627                                ITy,                   // lower
2628                                ITy,                   // upper
2629                                ITy,                   // stride
2630                                ITy                    // chunk
2631   };
2632   auto *FnTy =
2633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2634   return CGM.CreateRuntimeFunction(FnTy, Name);
2635 }
2636 
2637 llvm::FunctionCallee
2638 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2639   assert((IVSize == 32 || IVSize == 64) &&
2640          "IV size is not compatible with the omp runtime");
2641   StringRef Name =
2642       IVSize == 32
2643           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2644           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2645   llvm::Type *TypeParams[] = {
2646       getIdentTyPointerTy(), // loc
2647       CGM.Int32Ty,           // tid
2648   };
2649   auto *FnTy =
2650       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2651   return CGM.CreateRuntimeFunction(FnTy, Name);
2652 }
2653 
2654 llvm::FunctionCallee
2655 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2656   assert((IVSize == 32 || IVSize == 64) &&
2657          "IV size is not compatible with the omp runtime");
2658   StringRef Name =
2659       IVSize == 32
2660           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2661           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2662   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2663   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2664   llvm::Type *TypeParams[] = {
2665     getIdentTyPointerTy(),                     // loc
2666     CGM.Int32Ty,                               // tid
2667     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2668     PtrTy,                                     // p_lower
2669     PtrTy,                                     // p_upper
2670     PtrTy                                      // p_stride
2671   };
2672   auto *FnTy =
2673       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2674   return CGM.CreateRuntimeFunction(FnTy, Name);
2675 }
2676 
2677 /// Obtain information that uniquely identifies a target entry. This
2678 /// consists of the file and device IDs as well as line number associated with
2679 /// the relevant entry source location.
2680 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2681                                      unsigned &DeviceID, unsigned &FileID,
2682                                      unsigned &LineNum) {
2683   SourceManager &SM = C.getSourceManager();
2684 
2685   // The loc should be always valid and have a file ID (the user cannot use
2686   // #pragma directives in macros)
2687 
2688   assert(Loc.isValid() && "Source location is expected to be always valid.");
2689 
2690   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2691   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2692 
2693   llvm::sys::fs::UniqueID ID;
2694   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2695     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2696         << PLoc.getFilename() << EC.message();
2697 
2698   DeviceID = ID.getDevice();
2699   FileID = ID.getFile();
2700   LineNum = PLoc.getLine();
2701 }
2702 
2703 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2704   if (CGM.getLangOpts().OpenMPSimd)
2705     return Address::invalid();
2706   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2707       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2708   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2709               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2710                HasRequiresUnifiedSharedMemory))) {
2711     SmallString<64> PtrName;
2712     {
2713       llvm::raw_svector_ostream OS(PtrName);
2714       OS << CGM.getMangledName(GlobalDecl(VD));
2715       if (!VD->isExternallyVisible()) {
2716         unsigned DeviceID, FileID, Line;
2717         getTargetEntryUniqueInfo(CGM.getContext(),
2718                                  VD->getCanonicalDecl()->getBeginLoc(),
2719                                  DeviceID, FileID, Line);
2720         OS << llvm::format("_%x", FileID);
2721       }
2722       OS << "_decl_tgt_ref_ptr";
2723     }
2724     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2725     if (!Ptr) {
2726       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2727       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2728                                         PtrName);
2729 
2730       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2731       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2732 
2733       if (!CGM.getLangOpts().OpenMPIsDevice)
2734         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2735       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2736     }
2737     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2738   }
2739   return Address::invalid();
2740 }
2741 
2742 llvm::Constant *
2743 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2744   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2745          !CGM.getContext().getTargetInfo().isTLSSupported());
2746   // Lookup the entry, lazily creating it if necessary.
2747   std::string Suffix = getName({"cache", ""});
2748   return getOrCreateInternalVariable(
2749       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2750 }
2751 
2752 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2753                                                 const VarDecl *VD,
2754                                                 Address VDAddr,
2755                                                 SourceLocation Loc) {
2756   if (CGM.getLangOpts().OpenMPUseTLS &&
2757       CGM.getContext().getTargetInfo().isTLSSupported())
2758     return VDAddr;
2759 
2760   llvm::Type *VarTy = VDAddr.getElementType();
2761   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2762                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2763                                                        CGM.Int8PtrTy),
2764                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2765                          getOrCreateThreadPrivateCache(VD)};
2766   return Address(CGF.EmitRuntimeCall(
2767       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2768                  VDAddr.getAlignment());
2769 }
2770 
2771 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2772     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2773     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2774   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2775   // library.
2776   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2777   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2778                       OMPLoc);
2779   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2780   // to register constructor/destructor for variable.
2781   llvm::Value *Args[] = {
2782       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2783       Ctor, CopyCtor, Dtor};
2784   CGF.EmitRuntimeCall(
2785       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2786 }
2787 
2788 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2789     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2790     bool PerformInit, CodeGenFunction *CGF) {
2791   if (CGM.getLangOpts().OpenMPUseTLS &&
2792       CGM.getContext().getTargetInfo().isTLSSupported())
2793     return nullptr;
2794 
2795   VD = VD->getDefinition(CGM.getContext());
2796   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2797     QualType ASTTy = VD->getType();
2798 
2799     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2800     const Expr *Init = VD->getAnyInitializer();
2801     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2802       // Generate function that re-emits the declaration's initializer into the
2803       // threadprivate copy of the variable VD
2804       CodeGenFunction CtorCGF(CGM);
2805       FunctionArgList Args;
2806       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2807                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2808                             ImplicitParamDecl::Other);
2809       Args.push_back(&Dst);
2810 
2811       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2812           CGM.getContext().VoidPtrTy, Args);
2813       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2814       std::string Name = getName({"__kmpc_global_ctor_", ""});
2815       llvm::Function *Fn =
2816           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2817       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2818                             Args, Loc, Loc);
2819       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2820           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2821           CGM.getContext().VoidPtrTy, Dst.getLocation());
2822       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2823       Arg = CtorCGF.Builder.CreateElementBitCast(
2824           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2825       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2826                                /*IsInitializer=*/true);
2827       ArgVal = CtorCGF.EmitLoadOfScalar(
2828           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2829           CGM.getContext().VoidPtrTy, Dst.getLocation());
2830       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2831       CtorCGF.FinishFunction();
2832       Ctor = Fn;
2833     }
2834     if (VD->getType().isDestructedType() != QualType::DK_none) {
2835       // Generate function that emits destructor call for the threadprivate copy
2836       // of the variable VD
2837       CodeGenFunction DtorCGF(CGM);
2838       FunctionArgList Args;
2839       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2840                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2841                             ImplicitParamDecl::Other);
2842       Args.push_back(&Dst);
2843 
2844       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2845           CGM.getContext().VoidTy, Args);
2846       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2847       std::string Name = getName({"__kmpc_global_dtor_", ""});
2848       llvm::Function *Fn =
2849           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2850       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2851       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2852                             Loc, Loc);
2853       // Create a scope with an artificial location for the body of this function.
2854       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2855       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2856           DtorCGF.GetAddrOfLocalVar(&Dst),
2857           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2858       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2859                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2860                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2861       DtorCGF.FinishFunction();
2862       Dtor = Fn;
2863     }
2864     // Do not emit init function if it is not required.
2865     if (!Ctor && !Dtor)
2866       return nullptr;
2867 
2868     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2869     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2870                                                /*isVarArg=*/false)
2871                            ->getPointerTo();
2872     // Copying constructor for the threadprivate variable.
2873     // Must be NULL - reserved by runtime, but currently it requires that this
2874     // parameter is always NULL. Otherwise it fires assertion.
2875     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2876     if (Ctor == nullptr) {
2877       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2878                                              /*isVarArg=*/false)
2879                          ->getPointerTo();
2880       Ctor = llvm::Constant::getNullValue(CtorTy);
2881     }
2882     if (Dtor == nullptr) {
2883       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2884                                              /*isVarArg=*/false)
2885                          ->getPointerTo();
2886       Dtor = llvm::Constant::getNullValue(DtorTy);
2887     }
2888     if (!CGF) {
2889       auto *InitFunctionTy =
2890           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2891       std::string Name = getName({"__omp_threadprivate_init_", ""});
2892       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2893           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2894       CodeGenFunction InitCGF(CGM);
2895       FunctionArgList ArgList;
2896       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2897                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2898                             Loc, Loc);
2899       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2900       InitCGF.FinishFunction();
2901       return InitFunction;
2902     }
2903     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2904   }
2905   return nullptr;
2906 }
2907 
2908 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2909                                                      llvm::GlobalVariable *Addr,
2910                                                      bool PerformInit) {
2911   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2912       !CGM.getLangOpts().OpenMPIsDevice)
2913     return false;
2914   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2915       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2916   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2917       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2918        HasRequiresUnifiedSharedMemory))
2919     return CGM.getLangOpts().OpenMPIsDevice;
2920   VD = VD->getDefinition(CGM.getContext());
2921   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2922     return CGM.getLangOpts().OpenMPIsDevice;
2923 
2924   QualType ASTTy = VD->getType();
2925 
2926   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2927   // Produce the unique prefix to identify the new target regions. We use
2928   // the source location of the variable declaration which we know to not
2929   // conflict with any target region.
2930   unsigned DeviceID;
2931   unsigned FileID;
2932   unsigned Line;
2933   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2934   SmallString<128> Buffer, Out;
2935   {
2936     llvm::raw_svector_ostream OS(Buffer);
2937     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2938        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2939   }
2940 
2941   const Expr *Init = VD->getAnyInitializer();
2942   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2943     llvm::Constant *Ctor;
2944     llvm::Constant *ID;
2945     if (CGM.getLangOpts().OpenMPIsDevice) {
2946       // Generate function that re-emits the declaration's initializer into
2947       // the threadprivate copy of the variable VD
2948       CodeGenFunction CtorCGF(CGM);
2949 
2950       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2951       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2952       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2953           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2954       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2955       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2956                             FunctionArgList(), Loc, Loc);
2957       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2958       CtorCGF.EmitAnyExprToMem(Init,
2959                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2960                                Init->getType().getQualifiers(),
2961                                /*IsInitializer=*/true);
2962       CtorCGF.FinishFunction();
2963       Ctor = Fn;
2964       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2965       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2966     } else {
2967       Ctor = new llvm::GlobalVariable(
2968           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2969           llvm::GlobalValue::PrivateLinkage,
2970           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2971       ID = Ctor;
2972     }
2973 
2974     // Register the information for the entry associated with the constructor.
2975     Out.clear();
2976     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2977         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2978         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2979   }
2980   if (VD->getType().isDestructedType() != QualType::DK_none) {
2981     llvm::Constant *Dtor;
2982     llvm::Constant *ID;
2983     if (CGM.getLangOpts().OpenMPIsDevice) {
2984       // Generate function that emits destructor call for the threadprivate
2985       // copy of the variable VD
2986       CodeGenFunction DtorCGF(CGM);
2987 
2988       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2989       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2992       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2993       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994                             FunctionArgList(), Loc, Loc);
2995       // Create a scope with an artificial location for the body of this
2996       // function.
2997       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2998       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2999                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3000                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3001       DtorCGF.FinishFunction();
3002       Dtor = Fn;
3003       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3004       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3005     } else {
3006       Dtor = new llvm::GlobalVariable(
3007           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3008           llvm::GlobalValue::PrivateLinkage,
3009           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3010       ID = Dtor;
3011     }
3012     // Register the information for the entry associated with the destructor.
3013     Out.clear();
3014     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3015         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3016         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3017   }
3018   return CGM.getLangOpts().OpenMPIsDevice;
3019 }
3020 
3021 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3022                                                           QualType VarType,
3023                                                           StringRef Name) {
3024   std::string Suffix = getName({"artificial", ""});
3025   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3026   llvm::Value *GAddr =
3027       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3028   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3029       CGM.getTarget().isTLSSupported()) {
3030     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3031     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3032   }
3033   std::string CacheSuffix = getName({"cache", ""});
3034   llvm::Value *Args[] = {
3035       emitUpdateLocation(CGF, SourceLocation()),
3036       getThreadID(CGF, SourceLocation()),
3037       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3038       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3039                                 /*isSigned=*/false),
3040       getOrCreateInternalVariable(
3041           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3042   return Address(
3043       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044           CGF.EmitRuntimeCall(
3045               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3046           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3047       CGM.getContext().getTypeAlignInChars(VarType));
3048 }
3049 
3050 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3051                                    const RegionCodeGenTy &ThenGen,
3052                                    const RegionCodeGenTy &ElseGen) {
3053   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3054 
3055   // If the condition constant folds and can be elided, try to avoid emitting
3056   // the condition and the dead arm of the if/else.
3057   bool CondConstant;
3058   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3059     if (CondConstant)
3060       ThenGen(CGF);
3061     else
3062       ElseGen(CGF);
3063     return;
3064   }
3065 
3066   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3067   // emit the conditional branch.
3068   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3069   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3070   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3071   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3072 
3073   // Emit the 'then' code.
3074   CGF.EmitBlock(ThenBlock);
3075   ThenGen(CGF);
3076   CGF.EmitBranch(ContBlock);
3077   // Emit the 'else' code if present.
3078   // There is no need to emit line number for unconditional branch.
3079   (void)ApplyDebugLocation::CreateEmpty(CGF);
3080   CGF.EmitBlock(ElseBlock);
3081   ElseGen(CGF);
3082   // There is no need to emit line number for unconditional branch.
3083   (void)ApplyDebugLocation::CreateEmpty(CGF);
3084   CGF.EmitBranch(ContBlock);
3085   // Emit the continuation block for code after the if.
3086   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3087 }
3088 
3089 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3090                                        llvm::Function *OutlinedFn,
3091                                        ArrayRef<llvm::Value *> CapturedVars,
3092                                        const Expr *IfCond) {
3093   if (!CGF.HaveInsertPoint())
3094     return;
3095   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3096   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3097                                                      PrePostActionTy &) {
3098     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3099     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3100     llvm::Value *Args[] = {
3101         RTLoc,
3102         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3103         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3104     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3105     RealArgs.append(std::begin(Args), std::end(Args));
3106     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3107 
3108     llvm::FunctionCallee RTLFn =
3109         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3110     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3111   };
3112   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3113                                                           PrePostActionTy &) {
3114     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3115     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3116     // Build calls:
3117     // __kmpc_serialized_parallel(&Loc, GTid);
3118     llvm::Value *Args[] = {RTLoc, ThreadID};
3119     CGF.EmitRuntimeCall(
3120         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3121 
3122     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3123     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3124     Address ZeroAddrBound =
3125         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3126                                          /*Name=*/".bound.zero.addr");
3127     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3128     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3129     // ThreadId for serialized parallels is 0.
3130     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3131     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3132     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3133     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3134 
3135     // __kmpc_end_serialized_parallel(&Loc, GTid);
3136     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3137     CGF.EmitRuntimeCall(
3138         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3139         EndArgs);
3140   };
3141   if (IfCond) {
3142     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3143   } else {
3144     RegionCodeGenTy ThenRCG(ThenGen);
3145     ThenRCG(CGF);
3146   }
3147 }
3148 
3149 // If we're inside an (outlined) parallel region, use the region info's
3150 // thread-ID variable (it is passed in a first argument of the outlined function
3151 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3152 // regular serial code region, get thread ID by calling kmp_int32
3153 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3154 // return the address of that temp.
3155 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3156                                              SourceLocation Loc) {
3157   if (auto *OMPRegionInfo =
3158           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3159     if (OMPRegionInfo->getThreadIDVariable())
3160       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3161 
3162   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3163   QualType Int32Ty =
3164       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3165   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3166   CGF.EmitStoreOfScalar(ThreadID,
3167                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3168 
3169   return ThreadIDTemp;
3170 }
3171 
3172 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3173     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3174   SmallString<256> Buffer;
3175   llvm::raw_svector_ostream Out(Buffer);
3176   Out << Name;
3177   StringRef RuntimeName = Out.str();
3178   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3179   if (Elem.second) {
3180     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3181            "OMP internal variable has different type than requested");
3182     return &*Elem.second;
3183   }
3184 
3185   return Elem.second = new llvm::GlobalVariable(
3186              CGM.getModule(), Ty, /*IsConstant*/ false,
3187              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3188              Elem.first(), /*InsertBefore=*/nullptr,
3189              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3190 }
3191 
3192 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3193   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3194   std::string Name = getName({Prefix, "var"});
3195   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3196 }
3197 
3198 namespace {
3199 /// Common pre(post)-action for different OpenMP constructs.
3200 class CommonActionTy final : public PrePostActionTy {
3201   llvm::FunctionCallee EnterCallee;
3202   ArrayRef<llvm::Value *> EnterArgs;
3203   llvm::FunctionCallee ExitCallee;
3204   ArrayRef<llvm::Value *> ExitArgs;
3205   bool Conditional;
3206   llvm::BasicBlock *ContBlock = nullptr;
3207 
3208 public:
3209   CommonActionTy(llvm::FunctionCallee EnterCallee,
3210                  ArrayRef<llvm::Value *> EnterArgs,
3211                  llvm::FunctionCallee ExitCallee,
3212                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3213       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3214         ExitArgs(ExitArgs), Conditional(Conditional) {}
3215   void Enter(CodeGenFunction &CGF) override {
3216     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3217     if (Conditional) {
3218       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3219       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3220       ContBlock = CGF.createBasicBlock("omp_if.end");
3221       // Generate the branch (If-stmt)
3222       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3223       CGF.EmitBlock(ThenBlock);
3224     }
3225   }
3226   void Done(CodeGenFunction &CGF) {
3227     // Emit the rest of blocks/branches
3228     CGF.EmitBranch(ContBlock);
3229     CGF.EmitBlock(ContBlock, true);
3230   }
3231   void Exit(CodeGenFunction &CGF) override {
3232     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3233   }
3234 };
3235 } // anonymous namespace
3236 
3237 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3238                                          StringRef CriticalName,
3239                                          const RegionCodeGenTy &CriticalOpGen,
3240                                          SourceLocation Loc, const Expr *Hint) {
3241   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3242   // CriticalOpGen();
3243   // __kmpc_end_critical(ident_t *, gtid, Lock);
3244   // Prepare arguments and build a call to __kmpc_critical
3245   if (!CGF.HaveInsertPoint())
3246     return;
3247   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3248                          getCriticalRegionLock(CriticalName)};
3249   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3250                                                 std::end(Args));
3251   if (Hint) {
3252     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3253         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3254   }
3255   CommonActionTy Action(
3256       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3257                                  : OMPRTL__kmpc_critical),
3258       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3259   CriticalOpGen.setAction(Action);
3260   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3261 }
3262 
3263 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3264                                        const RegionCodeGenTy &MasterOpGen,
3265                                        SourceLocation Loc) {
3266   if (!CGF.HaveInsertPoint())
3267     return;
3268   // if(__kmpc_master(ident_t *, gtid)) {
3269   //   MasterOpGen();
3270   //   __kmpc_end_master(ident_t *, gtid);
3271   // }
3272   // Prepare arguments and build a call to __kmpc_master
3273   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3274   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3275                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3276                         /*Conditional=*/true);
3277   MasterOpGen.setAction(Action);
3278   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3279   Action.Done(CGF);
3280 }
3281 
3282 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3283                                         SourceLocation Loc) {
3284   if (!CGF.HaveInsertPoint())
3285     return;
3286   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3287   llvm::Value *Args[] = {
3288       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3289       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3290   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3291   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3292     Region->emitUntiedSwitch(CGF);
3293 }
3294 
3295 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3296                                           const RegionCodeGenTy &TaskgroupOpGen,
3297                                           SourceLocation Loc) {
3298   if (!CGF.HaveInsertPoint())
3299     return;
3300   // __kmpc_taskgroup(ident_t *, gtid);
3301   // TaskgroupOpGen();
3302   // __kmpc_end_taskgroup(ident_t *, gtid);
3303   // Prepare arguments and build a call to __kmpc_taskgroup
3304   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3305   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3306                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3307                         Args);
3308   TaskgroupOpGen.setAction(Action);
3309   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3310 }
3311 
3312 /// Given an array of pointers to variables, project the address of a
3313 /// given variable.
3314 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3315                                       unsigned Index, const VarDecl *Var) {
3316   // Pull out the pointer to the variable.
3317   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3318   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3319 
3320   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3321   Addr = CGF.Builder.CreateElementBitCast(
3322       Addr, CGF.ConvertTypeForMem(Var->getType()));
3323   return Addr;
3324 }
3325 
3326 static llvm::Value *emitCopyprivateCopyFunction(
3327     CodeGenModule &CGM, llvm::Type *ArgsType,
3328     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3329     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3330     SourceLocation Loc) {
3331   ASTContext &C = CGM.getContext();
3332   // void copy_func(void *LHSArg, void *RHSArg);
3333   FunctionArgList Args;
3334   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3335                            ImplicitParamDecl::Other);
3336   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3337                            ImplicitParamDecl::Other);
3338   Args.push_back(&LHSArg);
3339   Args.push_back(&RHSArg);
3340   const auto &CGFI =
3341       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3342   std::string Name =
3343       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3344   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3345                                     llvm::GlobalValue::InternalLinkage, Name,
3346                                     &CGM.getModule());
3347   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3348   Fn->setDoesNotRecurse();
3349   CodeGenFunction CGF(CGM);
3350   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3351   // Dest = (void*[n])(LHSArg);
3352   // Src = (void*[n])(RHSArg);
3353   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3354       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3355       ArgsType), CGF.getPointerAlign());
3356   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3357       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3358       ArgsType), CGF.getPointerAlign());
3359   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3360   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3361   // ...
3362   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3363   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3364     const auto *DestVar =
3365         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3366     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3367 
3368     const auto *SrcVar =
3369         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3370     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3371 
3372     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3373     QualType Type = VD->getType();
3374     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3375   }
3376   CGF.FinishFunction();
3377   return Fn;
3378 }
3379 
3380 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3381                                        const RegionCodeGenTy &SingleOpGen,
3382                                        SourceLocation Loc,
3383                                        ArrayRef<const Expr *> CopyprivateVars,
3384                                        ArrayRef<const Expr *> SrcExprs,
3385                                        ArrayRef<const Expr *> DstExprs,
3386                                        ArrayRef<const Expr *> AssignmentOps) {
3387   if (!CGF.HaveInsertPoint())
3388     return;
3389   assert(CopyprivateVars.size() == SrcExprs.size() &&
3390          CopyprivateVars.size() == DstExprs.size() &&
3391          CopyprivateVars.size() == AssignmentOps.size());
3392   ASTContext &C = CGM.getContext();
3393   // int32 did_it = 0;
3394   // if(__kmpc_single(ident_t *, gtid)) {
3395   //   SingleOpGen();
3396   //   __kmpc_end_single(ident_t *, gtid);
3397   //   did_it = 1;
3398   // }
3399   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3400   // <copy_func>, did_it);
3401 
3402   Address DidIt = Address::invalid();
3403   if (!CopyprivateVars.empty()) {
3404     // int32 did_it = 0;
3405     QualType KmpInt32Ty =
3406         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3407     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3408     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3409   }
3410   // Prepare arguments and build a call to __kmpc_single
3411   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3412   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3413                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3414                         /*Conditional=*/true);
3415   SingleOpGen.setAction(Action);
3416   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3417   if (DidIt.isValid()) {
3418     // did_it = 1;
3419     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3420   }
3421   Action.Done(CGF);
3422   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3423   // <copy_func>, did_it);
3424   if (DidIt.isValid()) {
3425     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3426     QualType CopyprivateArrayTy = C.getConstantArrayType(
3427         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3428         /*IndexTypeQuals=*/0);
3429     // Create a list of all private variables for copyprivate.
3430     Address CopyprivateList =
3431         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3432     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3433       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3434       CGF.Builder.CreateStore(
3435           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3436               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3437               CGF.VoidPtrTy),
3438           Elem);
3439     }
3440     // Build function that copies private values from single region to all other
3441     // threads in the corresponding parallel region.
3442     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3443         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3444         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3445     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3446     Address CL =
3447       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3448                                                       CGF.VoidPtrTy);
3449     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3450     llvm::Value *Args[] = {
3451         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3452         getThreadID(CGF, Loc),        // i32 <gtid>
3453         BufSize,                      // size_t <buf_size>
3454         CL.getPointer(),              // void *<copyprivate list>
3455         CpyFn,                        // void (*) (void *, void *) <copy_func>
3456         DidItVal                      // i32 did_it
3457     };
3458     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3459   }
3460 }
3461 
3462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3463                                         const RegionCodeGenTy &OrderedOpGen,
3464                                         SourceLocation Loc, bool IsThreads) {
3465   if (!CGF.HaveInsertPoint())
3466     return;
3467   // __kmpc_ordered(ident_t *, gtid);
3468   // OrderedOpGen();
3469   // __kmpc_end_ordered(ident_t *, gtid);
3470   // Prepare arguments and build a call to __kmpc_ordered
3471   if (IsThreads) {
3472     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3473     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3474                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3475                           Args);
3476     OrderedOpGen.setAction(Action);
3477     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3478     return;
3479   }
3480   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3481 }
3482 
3483 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3484   unsigned Flags;
3485   if (Kind == OMPD_for)
3486     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3487   else if (Kind == OMPD_sections)
3488     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3489   else if (Kind == OMPD_single)
3490     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3491   else if (Kind == OMPD_barrier)
3492     Flags = OMP_IDENT_BARRIER_EXPL;
3493   else
3494     Flags = OMP_IDENT_BARRIER_IMPL;
3495   return Flags;
3496 }
3497 
3498 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3499     CodeGenFunction &CGF, const OMPLoopDirective &S,
3500     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3501   // Check if the loop directive is actually a doacross loop directive. In this
3502   // case choose static, 1 schedule.
3503   if (llvm::any_of(
3504           S.getClausesOfKind<OMPOrderedClause>(),
3505           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3506     ScheduleKind = OMPC_SCHEDULE_static;
3507     // Chunk size is 1 in this case.
3508     llvm::APInt ChunkSize(32, 1);
3509     ChunkExpr = IntegerLiteral::Create(
3510         CGF.getContext(), ChunkSize,
3511         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3512         SourceLocation());
3513   }
3514 }
3515 
3516 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3517                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3518                                       bool ForceSimpleCall) {
3519   // Check if we should use the OMPBuilder
3520   auto *OMPRegionInfo =
3521       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3522   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3523   if (OMPBuilder) {
3524     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3525         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3526     return;
3527   }
3528 
3529   if (!CGF.HaveInsertPoint())
3530     return;
3531   // Build call __kmpc_cancel_barrier(loc, thread_id);
3532   // Build call __kmpc_barrier(loc, thread_id);
3533   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3534   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3535   // thread_id);
3536   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3537                          getThreadID(CGF, Loc)};
3538   if (OMPRegionInfo) {
3539     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3540       llvm::Value *Result = CGF.EmitRuntimeCall(
3541           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3542       if (EmitChecks) {
3543         // if (__kmpc_cancel_barrier()) {
3544         //   exit from construct;
3545         // }
3546         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3547         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3548         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3549         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3550         CGF.EmitBlock(ExitBB);
3551         //   exit from construct;
3552         CodeGenFunction::JumpDest CancelDestination =
3553             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3554         CGF.EmitBranchThroughCleanup(CancelDestination);
3555         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3556       }
3557       return;
3558     }
3559   }
3560   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3561 }
3562 
3563 /// Map the OpenMP loop schedule to the runtime enumeration.
3564 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3565                                           bool Chunked, bool Ordered) {
3566   switch (ScheduleKind) {
3567   case OMPC_SCHEDULE_static:
3568     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3569                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3570   case OMPC_SCHEDULE_dynamic:
3571     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3572   case OMPC_SCHEDULE_guided:
3573     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3574   case OMPC_SCHEDULE_runtime:
3575     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3576   case OMPC_SCHEDULE_auto:
3577     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3578   case OMPC_SCHEDULE_unknown:
3579     assert(!Chunked && "chunk was specified but schedule kind not known");
3580     return Ordered ? OMP_ord_static : OMP_sch_static;
3581   }
3582   llvm_unreachable("Unexpected runtime schedule");
3583 }
3584 
3585 /// Map the OpenMP distribute schedule to the runtime enumeration.
3586 static OpenMPSchedType
3587 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3588   // only static is allowed for dist_schedule
3589   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3590 }
3591 
3592 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3593                                          bool Chunked) const {
3594   OpenMPSchedType Schedule =
3595       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3596   return Schedule == OMP_sch_static;
3597 }
3598 
3599 bool CGOpenMPRuntime::isStaticNonchunked(
3600     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3601   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3602   return Schedule == OMP_dist_sch_static;
3603 }
3604 
3605 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3606                                       bool Chunked) const {
3607   OpenMPSchedType Schedule =
3608       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3609   return Schedule == OMP_sch_static_chunked;
3610 }
3611 
3612 bool CGOpenMPRuntime::isStaticChunked(
3613     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3614   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3615   return Schedule == OMP_dist_sch_static_chunked;
3616 }
3617 
3618 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3619   OpenMPSchedType Schedule =
3620       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3621   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3622   return Schedule != OMP_sch_static;
3623 }
3624 
3625 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3626                                   OpenMPScheduleClauseModifier M1,
3627                                   OpenMPScheduleClauseModifier M2) {
3628   int Modifier = 0;
3629   switch (M1) {
3630   case OMPC_SCHEDULE_MODIFIER_monotonic:
3631     Modifier = OMP_sch_modifier_monotonic;
3632     break;
3633   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3634     Modifier = OMP_sch_modifier_nonmonotonic;
3635     break;
3636   case OMPC_SCHEDULE_MODIFIER_simd:
3637     if (Schedule == OMP_sch_static_chunked)
3638       Schedule = OMP_sch_static_balanced_chunked;
3639     break;
3640   case OMPC_SCHEDULE_MODIFIER_last:
3641   case OMPC_SCHEDULE_MODIFIER_unknown:
3642     break;
3643   }
3644   switch (M2) {
3645   case OMPC_SCHEDULE_MODIFIER_monotonic:
3646     Modifier = OMP_sch_modifier_monotonic;
3647     break;
3648   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3649     Modifier = OMP_sch_modifier_nonmonotonic;
3650     break;
3651   case OMPC_SCHEDULE_MODIFIER_simd:
3652     if (Schedule == OMP_sch_static_chunked)
3653       Schedule = OMP_sch_static_balanced_chunked;
3654     break;
3655   case OMPC_SCHEDULE_MODIFIER_last:
3656   case OMPC_SCHEDULE_MODIFIER_unknown:
3657     break;
3658   }
3659   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3660   // If the static schedule kind is specified or if the ordered clause is
3661   // specified, and if the nonmonotonic modifier is not specified, the effect is
3662   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3663   // modifier is specified, the effect is as if the nonmonotonic modifier is
3664   // specified.
3665   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3666     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3667           Schedule == OMP_sch_static_balanced_chunked ||
3668           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3669           Schedule == OMP_dist_sch_static_chunked ||
3670           Schedule == OMP_dist_sch_static))
3671       Modifier = OMP_sch_modifier_nonmonotonic;
3672   }
3673   return Schedule | Modifier;
3674 }
3675 
3676 void CGOpenMPRuntime::emitForDispatchInit(
3677     CodeGenFunction &CGF, SourceLocation Loc,
3678     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3679     bool Ordered, const DispatchRTInput &DispatchValues) {
3680   if (!CGF.HaveInsertPoint())
3681     return;
3682   OpenMPSchedType Schedule = getRuntimeSchedule(
3683       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3684   assert(Ordered ||
3685          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3686           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3687           Schedule != OMP_sch_static_balanced_chunked));
3688   // Call __kmpc_dispatch_init(
3689   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3690   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3691   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3692 
3693   // If the Chunk was not specified in the clause - use default value 1.
3694   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3695                                             : CGF.Builder.getIntN(IVSize, 1);
3696   llvm::Value *Args[] = {
3697       emitUpdateLocation(CGF, Loc),
3698       getThreadID(CGF, Loc),
3699       CGF.Builder.getInt32(addMonoNonMonoModifier(
3700           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3701       DispatchValues.LB,                                     // Lower
3702       DispatchValues.UB,                                     // Upper
3703       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3704       Chunk                                                  // Chunk
3705   };
3706   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3707 }
3708 
3709 static void emitForStaticInitCall(
3710     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3711     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3712     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3713     const CGOpenMPRuntime::StaticRTInput &Values) {
3714   if (!CGF.HaveInsertPoint())
3715     return;
3716 
3717   assert(!Values.Ordered);
3718   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3719          Schedule == OMP_sch_static_balanced_chunked ||
3720          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3721          Schedule == OMP_dist_sch_static ||
3722          Schedule == OMP_dist_sch_static_chunked);
3723 
3724   // Call __kmpc_for_static_init(
3725   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3726   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3727   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3728   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3729   llvm::Value *Chunk = Values.Chunk;
3730   if (Chunk == nullptr) {
3731     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3732             Schedule == OMP_dist_sch_static) &&
3733            "expected static non-chunked schedule");
3734     // If the Chunk was not specified in the clause - use default value 1.
3735     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3736   } else {
3737     assert((Schedule == OMP_sch_static_chunked ||
3738             Schedule == OMP_sch_static_balanced_chunked ||
3739             Schedule == OMP_ord_static_chunked ||
3740             Schedule == OMP_dist_sch_static_chunked) &&
3741            "expected static chunked schedule");
3742   }
3743   llvm::Value *Args[] = {
3744       UpdateLocation,
3745       ThreadId,
3746       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3747                                                   M2)), // Schedule type
3748       Values.IL.getPointer(),                           // &isLastIter
3749       Values.LB.getPointer(),                           // &LB
3750       Values.UB.getPointer(),                           // &UB
3751       Values.ST.getPointer(),                           // &Stride
3752       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3753       Chunk                                             // Chunk
3754   };
3755   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3756 }
3757 
3758 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3759                                         SourceLocation Loc,
3760                                         OpenMPDirectiveKind DKind,
3761                                         const OpenMPScheduleTy &ScheduleKind,
3762                                         const StaticRTInput &Values) {
3763   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3764       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3765   assert(isOpenMPWorksharingDirective(DKind) &&
3766          "Expected loop-based or sections-based directive.");
3767   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3768                                              isOpenMPLoopDirective(DKind)
3769                                                  ? OMP_IDENT_WORK_LOOP
3770                                                  : OMP_IDENT_WORK_SECTIONS);
3771   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3772   llvm::FunctionCallee StaticInitFunction =
3773       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3774   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3775                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3776 }
3777 
3778 void CGOpenMPRuntime::emitDistributeStaticInit(
3779     CodeGenFunction &CGF, SourceLocation Loc,
3780     OpenMPDistScheduleClauseKind SchedKind,
3781     const CGOpenMPRuntime::StaticRTInput &Values) {
3782   OpenMPSchedType ScheduleNum =
3783       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3784   llvm::Value *UpdatedLocation =
3785       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3786   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3787   llvm::FunctionCallee StaticInitFunction =
3788       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3789   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3790                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3791                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3792 }
3793 
3794 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3795                                           SourceLocation Loc,
3796                                           OpenMPDirectiveKind DKind) {
3797   if (!CGF.HaveInsertPoint())
3798     return;
3799   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3800   llvm::Value *Args[] = {
3801       emitUpdateLocation(CGF, Loc,
3802                          isOpenMPDistributeDirective(DKind)
3803                              ? OMP_IDENT_WORK_DISTRIBUTE
3804                              : isOpenMPLoopDirective(DKind)
3805                                    ? OMP_IDENT_WORK_LOOP
3806                                    : OMP_IDENT_WORK_SECTIONS),
3807       getThreadID(CGF, Loc)};
3808   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3809                       Args);
3810 }
3811 
3812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3813                                                  SourceLocation Loc,
3814                                                  unsigned IVSize,
3815                                                  bool IVSigned) {
3816   if (!CGF.HaveInsertPoint())
3817     return;
3818   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3819   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3820   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3821 }
3822 
3823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3824                                           SourceLocation Loc, unsigned IVSize,
3825                                           bool IVSigned, Address IL,
3826                                           Address LB, Address UB,
3827                                           Address ST) {
3828   // Call __kmpc_dispatch_next(
3829   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3830   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3831   //          kmp_int[32|64] *p_stride);
3832   llvm::Value *Args[] = {
3833       emitUpdateLocation(CGF, Loc),
3834       getThreadID(CGF, Loc),
3835       IL.getPointer(), // &isLastIter
3836       LB.getPointer(), // &Lower
3837       UB.getPointer(), // &Upper
3838       ST.getPointer()  // &Stride
3839   };
3840   llvm::Value *Call =
3841       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3842   return CGF.EmitScalarConversion(
3843       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3844       CGF.getContext().BoolTy, Loc);
3845 }
3846 
3847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3848                                            llvm::Value *NumThreads,
3849                                            SourceLocation Loc) {
3850   if (!CGF.HaveInsertPoint())
3851     return;
3852   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3853   llvm::Value *Args[] = {
3854       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3855       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3856   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3857                       Args);
3858 }
3859 
3860 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3861                                          ProcBindKind ProcBind,
3862                                          SourceLocation Loc) {
3863   if (!CGF.HaveInsertPoint())
3864     return;
3865   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3866   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3867   llvm::Value *Args[] = {
3868       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3869       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3870   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3871 }
3872 
3873 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3874                                 SourceLocation Loc) {
3875   if (!CGF.HaveInsertPoint())
3876     return;
3877   // Build call void __kmpc_flush(ident_t *loc)
3878   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3879                       emitUpdateLocation(CGF, Loc));
3880 }
3881 
3882 namespace {
3883 /// Indexes of fields for type kmp_task_t.
3884 enum KmpTaskTFields {
3885   /// List of shared variables.
3886   KmpTaskTShareds,
3887   /// Task routine.
3888   KmpTaskTRoutine,
3889   /// Partition id for the untied tasks.
3890   KmpTaskTPartId,
3891   /// Function with call of destructors for private variables.
3892   Data1,
3893   /// Task priority.
3894   Data2,
3895   /// (Taskloops only) Lower bound.
3896   KmpTaskTLowerBound,
3897   /// (Taskloops only) Upper bound.
3898   KmpTaskTUpperBound,
3899   /// (Taskloops only) Stride.
3900   KmpTaskTStride,
3901   /// (Taskloops only) Is last iteration flag.
3902   KmpTaskTLastIter,
3903   /// (Taskloops only) Reduction data.
3904   KmpTaskTReductions,
3905 };
3906 } // anonymous namespace
3907 
3908 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3909   return OffloadEntriesTargetRegion.empty() &&
3910          OffloadEntriesDeviceGlobalVar.empty();
3911 }
3912 
3913 /// Initialize target region entry.
3914 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3915     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3916                                     StringRef ParentName, unsigned LineNum,
3917                                     unsigned Order) {
3918   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3919                                              "only required for the device "
3920                                              "code generation.");
3921   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3922       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3923                                    OMPTargetRegionEntryTargetRegion);
3924   ++OffloadingEntriesNum;
3925 }
3926 
3927 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3928     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3929                                   StringRef ParentName, unsigned LineNum,
3930                                   llvm::Constant *Addr, llvm::Constant *ID,
3931                                   OMPTargetRegionEntryKind Flags) {
3932   // If we are emitting code for a target, the entry is already initialized,
3933   // only has to be registered.
3934   if (CGM.getLangOpts().OpenMPIsDevice) {
3935     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3936       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3937           DiagnosticsEngine::Error,
3938           "Unable to find target region on line '%0' in the device code.");
3939       CGM.getDiags().Report(DiagID) << LineNum;
3940       return;
3941     }
3942     auto &Entry =
3943         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3944     assert(Entry.isValid() && "Entry not initialized!");
3945     Entry.setAddress(Addr);
3946     Entry.setID(ID);
3947     Entry.setFlags(Flags);
3948   } else {
3949     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3950     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3951     ++OffloadingEntriesNum;
3952   }
3953 }
3954 
3955 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3956     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3957     unsigned LineNum) const {
3958   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3959   if (PerDevice == OffloadEntriesTargetRegion.end())
3960     return false;
3961   auto PerFile = PerDevice->second.find(FileID);
3962   if (PerFile == PerDevice->second.end())
3963     return false;
3964   auto PerParentName = PerFile->second.find(ParentName);
3965   if (PerParentName == PerFile->second.end())
3966     return false;
3967   auto PerLine = PerParentName->second.find(LineNum);
3968   if (PerLine == PerParentName->second.end())
3969     return false;
3970   // Fail if this entry is already registered.
3971   if (PerLine->second.getAddress() || PerLine->second.getID())
3972     return false;
3973   return true;
3974 }
3975 
3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3977     const OffloadTargetRegionEntryInfoActTy &Action) {
3978   // Scan all target region entries and perform the provided action.
3979   for (const auto &D : OffloadEntriesTargetRegion)
3980     for (const auto &F : D.second)
3981       for (const auto &P : F.second)
3982         for (const auto &L : P.second)
3983           Action(D.first, F.first, P.first(), L.first, L.second);
3984 }
3985 
3986 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3987     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3988                                        OMPTargetGlobalVarEntryKind Flags,
3989                                        unsigned Order) {
3990   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3991                                              "only required for the device "
3992                                              "code generation.");
3993   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3994   ++OffloadingEntriesNum;
3995 }
3996 
3997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3998     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3999                                      CharUnits VarSize,
4000                                      OMPTargetGlobalVarEntryKind Flags,
4001                                      llvm::GlobalValue::LinkageTypes Linkage) {
4002   if (CGM.getLangOpts().OpenMPIsDevice) {
4003     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4004     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4005            "Entry not initialized!");
4006     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4007            "Resetting with the new address.");
4008     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4009       if (Entry.getVarSize().isZero()) {
4010         Entry.setVarSize(VarSize);
4011         Entry.setLinkage(Linkage);
4012       }
4013       return;
4014     }
4015     Entry.setVarSize(VarSize);
4016     Entry.setLinkage(Linkage);
4017     Entry.setAddress(Addr);
4018   } else {
4019     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4020       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4021       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4022              "Entry not initialized!");
4023       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4024              "Resetting with the new address.");
4025       if (Entry.getVarSize().isZero()) {
4026         Entry.setVarSize(VarSize);
4027         Entry.setLinkage(Linkage);
4028       }
4029       return;
4030     }
4031     OffloadEntriesDeviceGlobalVar.try_emplace(
4032         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4033     ++OffloadingEntriesNum;
4034   }
4035 }
4036 
4037 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4038     actOnDeviceGlobalVarEntriesInfo(
4039         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4040   // Scan all target region entries and perform the provided action.
4041   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4042     Action(E.getKey(), E.getValue());
4043 }
4044 
4045 void CGOpenMPRuntime::createOffloadEntry(
4046     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4047     llvm::GlobalValue::LinkageTypes Linkage) {
4048   StringRef Name = Addr->getName();
4049   llvm::Module &M = CGM.getModule();
4050   llvm::LLVMContext &C = M.getContext();
4051 
4052   // Create constant string with the name.
4053   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4054 
4055   std::string StringName = getName({"omp_offloading", "entry_name"});
4056   auto *Str = new llvm::GlobalVariable(
4057       M, StrPtrInit->getType(), /*isConstant=*/true,
4058       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4059   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4060 
4061   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4062                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4063                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4064                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4065                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4066   std::string EntryName = getName({"omp_offloading", "entry", ""});
4067   llvm::GlobalVariable *Entry = createGlobalStruct(
4068       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4069       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4070 
4071   // The entry has to be created in the section the linker expects it to be.
4072   Entry->setSection("omp_offloading_entries");
4073 }
4074 
4075 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4076   // Emit the offloading entries and metadata so that the device codegen side
4077   // can easily figure out what to emit. The produced metadata looks like
4078   // this:
4079   //
4080   // !omp_offload.info = !{!1, ...}
4081   //
4082   // Right now we only generate metadata for function that contain target
4083   // regions.
4084 
4085   // If we are in simd mode or there are no entries, we don't need to do
4086   // anything.
4087   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4088     return;
4089 
4090   llvm::Module &M = CGM.getModule();
4091   llvm::LLVMContext &C = M.getContext();
4092   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4093                          SourceLocation, StringRef>,
4094               16>
4095       OrderedEntries(OffloadEntriesInfoManager.size());
4096   llvm::SmallVector<StringRef, 16> ParentFunctions(
4097       OffloadEntriesInfoManager.size());
4098 
4099   // Auxiliary methods to create metadata values and strings.
4100   auto &&GetMDInt = [this](unsigned V) {
4101     return llvm::ConstantAsMetadata::get(
4102         llvm::ConstantInt::get(CGM.Int32Ty, V));
4103   };
4104 
4105   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4106 
4107   // Create the offloading info metadata node.
4108   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4109 
4110   // Create function that emits metadata for each target region entry;
4111   auto &&TargetRegionMetadataEmitter =
4112       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4113        &GetMDString](
4114           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4115           unsigned Line,
4116           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4117         // Generate metadata for target regions. Each entry of this metadata
4118         // contains:
4119         // - Entry 0 -> Kind of this type of metadata (0).
4120         // - Entry 1 -> Device ID of the file where the entry was identified.
4121         // - Entry 2 -> File ID of the file where the entry was identified.
4122         // - Entry 3 -> Mangled name of the function where the entry was
4123         // identified.
4124         // - Entry 4 -> Line in the file where the entry was identified.
4125         // - Entry 5 -> Order the entry was created.
4126         // The first element of the metadata node is the kind.
4127         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4128                                  GetMDInt(FileID),      GetMDString(ParentName),
4129                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4130 
4131         SourceLocation Loc;
4132         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4133                   E = CGM.getContext().getSourceManager().fileinfo_end();
4134              I != E; ++I) {
4135           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4136               I->getFirst()->getUniqueID().getFile() == FileID) {
4137             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4138                 I->getFirst(), Line, 1);
4139             break;
4140           }
4141         }
4142         // Save this entry in the right position of the ordered entries array.
4143         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4144         ParentFunctions[E.getOrder()] = ParentName;
4145 
4146         // Add metadata to the named metadata node.
4147         MD->addOperand(llvm::MDNode::get(C, Ops));
4148       };
4149 
4150   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4151       TargetRegionMetadataEmitter);
4152 
4153   // Create function that emits metadata for each device global variable entry;
4154   auto &&DeviceGlobalVarMetadataEmitter =
4155       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4156        MD](StringRef MangledName,
4157            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4158                &E) {
4159         // Generate metadata for global variables. Each entry of this metadata
4160         // contains:
4161         // - Entry 0 -> Kind of this type of metadata (1).
4162         // - Entry 1 -> Mangled name of the variable.
4163         // - Entry 2 -> Declare target kind.
4164         // - Entry 3 -> Order the entry was created.
4165         // The first element of the metadata node is the kind.
4166         llvm::Metadata *Ops[] = {
4167             GetMDInt(E.getKind()), GetMDString(MangledName),
4168             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4169 
4170         // Save this entry in the right position of the ordered entries array.
4171         OrderedEntries[E.getOrder()] =
4172             std::make_tuple(&E, SourceLocation(), MangledName);
4173 
4174         // Add metadata to the named metadata node.
4175         MD->addOperand(llvm::MDNode::get(C, Ops));
4176       };
4177 
4178   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4179       DeviceGlobalVarMetadataEmitter);
4180 
4181   for (const auto &E : OrderedEntries) {
4182     assert(std::get<0>(E) && "All ordered entries must exist!");
4183     if (const auto *CE =
4184             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4185                 std::get<0>(E))) {
4186       if (!CE->getID() || !CE->getAddress()) {
4187         // Do not blame the entry if the parent funtion is not emitted.
4188         StringRef FnName = ParentFunctions[CE->getOrder()];
4189         if (!CGM.GetGlobalValue(FnName))
4190           continue;
4191         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4192             DiagnosticsEngine::Error,
4193             "Offloading entry for target region in %0 is incorrect: either the "
4194             "address or the ID is invalid.");
4195         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4196         continue;
4197       }
4198       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4199                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4200     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4201                                              OffloadEntryInfoDeviceGlobalVar>(
4202                    std::get<0>(E))) {
4203       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4204           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4205               CE->getFlags());
4206       switch (Flags) {
4207       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4208         if (CGM.getLangOpts().OpenMPIsDevice &&
4209             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4210           continue;
4211         if (!CE->getAddress()) {
4212           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4213               DiagnosticsEngine::Error, "Offloading entry for declare target "
4214                                         "variable %0 is incorrect: the "
4215                                         "address is invalid.");
4216           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4217           continue;
4218         }
4219         // The vaiable has no definition - no need to add the entry.
4220         if (CE->getVarSize().isZero())
4221           continue;
4222         break;
4223       }
4224       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4225         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4226                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4227                "Declaret target link address is set.");
4228         if (CGM.getLangOpts().OpenMPIsDevice)
4229           continue;
4230         if (!CE->getAddress()) {
4231           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4232               DiagnosticsEngine::Error,
4233               "Offloading entry for declare target variable is incorrect: the "
4234               "address is invalid.");
4235           CGM.getDiags().Report(DiagID);
4236           continue;
4237         }
4238         break;
4239       }
4240       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4241                          CE->getVarSize().getQuantity(), Flags,
4242                          CE->getLinkage());
4243     } else {
4244       llvm_unreachable("Unsupported entry kind.");
4245     }
4246   }
4247 }
4248 
4249 /// Loads all the offload entries information from the host IR
4250 /// metadata.
4251 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4252   // If we are in target mode, load the metadata from the host IR. This code has
4253   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4254 
4255   if (!CGM.getLangOpts().OpenMPIsDevice)
4256     return;
4257 
4258   if (CGM.getLangOpts().OMPHostIRFile.empty())
4259     return;
4260 
4261   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4262   if (auto EC = Buf.getError()) {
4263     CGM.getDiags().Report(diag::err_cannot_open_file)
4264         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4265     return;
4266   }
4267 
4268   llvm::LLVMContext C;
4269   auto ME = expectedToErrorOrAndEmitErrors(
4270       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4271 
4272   if (auto EC = ME.getError()) {
4273     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4274         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4275     CGM.getDiags().Report(DiagID)
4276         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4277     return;
4278   }
4279 
4280   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4281   if (!MD)
4282     return;
4283 
4284   for (llvm::MDNode *MN : MD->operands()) {
4285     auto &&GetMDInt = [MN](unsigned Idx) {
4286       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4287       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4288     };
4289 
4290     auto &&GetMDString = [MN](unsigned Idx) {
4291       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4292       return V->getString();
4293     };
4294 
4295     switch (GetMDInt(0)) {
4296     default:
4297       llvm_unreachable("Unexpected metadata!");
4298       break;
4299     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300         OffloadingEntryInfoTargetRegion:
4301       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4302           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4303           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4304           /*Order=*/GetMDInt(5));
4305       break;
4306     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4307         OffloadingEntryInfoDeviceGlobalVar:
4308       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4309           /*MangledName=*/GetMDString(1),
4310           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4311               /*Flags=*/GetMDInt(2)),
4312           /*Order=*/GetMDInt(3));
4313       break;
4314     }
4315   }
4316 }
4317 
4318 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4319   if (!KmpRoutineEntryPtrTy) {
4320     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4321     ASTContext &C = CGM.getContext();
4322     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4323     FunctionProtoType::ExtProtoInfo EPI;
4324     KmpRoutineEntryPtrQTy = C.getPointerType(
4325         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4326     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4327   }
4328 }
4329 
4330 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4331   // Make sure the type of the entry is already created. This is the type we
4332   // have to create:
4333   // struct __tgt_offload_entry{
4334   //   void      *addr;       // Pointer to the offload entry info.
4335   //                          // (function or global)
4336   //   char      *name;       // Name of the function or global.
4337   //   size_t     size;       // Size of the entry info (0 if it a function).
4338   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4339   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4340   // };
4341   if (TgtOffloadEntryQTy.isNull()) {
4342     ASTContext &C = CGM.getContext();
4343     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4344     RD->startDefinition();
4345     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4346     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4347     addFieldToRecordDecl(C, RD, C.getSizeType());
4348     addFieldToRecordDecl(
4349         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4350     addFieldToRecordDecl(
4351         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4352     RD->completeDefinition();
4353     RD->addAttr(PackedAttr::CreateImplicit(C));
4354     TgtOffloadEntryQTy = C.getRecordType(RD);
4355   }
4356   return TgtOffloadEntryQTy;
4357 }
4358 
4359 namespace {
4360 struct PrivateHelpersTy {
4361   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4362                    const VarDecl *PrivateElemInit)
4363       : Original(Original), PrivateCopy(PrivateCopy),
4364         PrivateElemInit(PrivateElemInit) {}
4365   const VarDecl *Original;
4366   const VarDecl *PrivateCopy;
4367   const VarDecl *PrivateElemInit;
4368 };
4369 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4370 } // anonymous namespace
4371 
4372 static RecordDecl *
4373 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4374   if (!Privates.empty()) {
4375     ASTContext &C = CGM.getContext();
4376     // Build struct .kmp_privates_t. {
4377     //         /*  private vars  */
4378     //       };
4379     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4380     RD->startDefinition();
4381     for (const auto &Pair : Privates) {
4382       const VarDecl *VD = Pair.second.Original;
4383       QualType Type = VD->getType().getNonReferenceType();
4384       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4385       if (VD->hasAttrs()) {
4386         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4387              E(VD->getAttrs().end());
4388              I != E; ++I)
4389           FD->addAttr(*I);
4390       }
4391     }
4392     RD->completeDefinition();
4393     return RD;
4394   }
4395   return nullptr;
4396 }
4397 
4398 static RecordDecl *
4399 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4400                          QualType KmpInt32Ty,
4401                          QualType KmpRoutineEntryPointerQTy) {
4402   ASTContext &C = CGM.getContext();
4403   // Build struct kmp_task_t {
4404   //         void *              shareds;
4405   //         kmp_routine_entry_t routine;
4406   //         kmp_int32           part_id;
4407   //         kmp_cmplrdata_t data1;
4408   //         kmp_cmplrdata_t data2;
4409   // For taskloops additional fields:
4410   //         kmp_uint64          lb;
4411   //         kmp_uint64          ub;
4412   //         kmp_int64           st;
4413   //         kmp_int32           liter;
4414   //         void *              reductions;
4415   //       };
4416   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4417   UD->startDefinition();
4418   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4419   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4420   UD->completeDefinition();
4421   QualType KmpCmplrdataTy = C.getRecordType(UD);
4422   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4423   RD->startDefinition();
4424   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4425   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4426   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4427   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4428   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4429   if (isOpenMPTaskLoopDirective(Kind)) {
4430     QualType KmpUInt64Ty =
4431         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4432     QualType KmpInt64Ty =
4433         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4434     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4435     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4436     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4437     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4438     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4439   }
4440   RD->completeDefinition();
4441   return RD;
4442 }
4443 
4444 static RecordDecl *
4445 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4446                                      ArrayRef<PrivateDataTy> Privates) {
4447   ASTContext &C = CGM.getContext();
4448   // Build struct kmp_task_t_with_privates {
4449   //         kmp_task_t task_data;
4450   //         .kmp_privates_t. privates;
4451   //       };
4452   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4453   RD->startDefinition();
4454   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4455   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4456     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4457   RD->completeDefinition();
4458   return RD;
4459 }
4460 
4461 /// Emit a proxy function which accepts kmp_task_t as the second
4462 /// argument.
4463 /// \code
4464 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4465 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4466 ///   For taskloops:
4467 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4468 ///   tt->reductions, tt->shareds);
4469 ///   return 0;
4470 /// }
4471 /// \endcode
4472 static llvm::Function *
4473 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4474                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4475                       QualType KmpTaskTWithPrivatesPtrQTy,
4476                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4477                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4478                       llvm::Value *TaskPrivatesMap) {
4479   ASTContext &C = CGM.getContext();
4480   FunctionArgList Args;
4481   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4482                             ImplicitParamDecl::Other);
4483   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4484                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4485                                 ImplicitParamDecl::Other);
4486   Args.push_back(&GtidArg);
4487   Args.push_back(&TaskTypeArg);
4488   const auto &TaskEntryFnInfo =
4489       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4490   llvm::FunctionType *TaskEntryTy =
4491       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4492   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4493   auto *TaskEntry = llvm::Function::Create(
4494       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4495   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4496   TaskEntry->setDoesNotRecurse();
4497   CodeGenFunction CGF(CGM);
4498   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4499                     Loc, Loc);
4500 
4501   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4502   // tt,
4503   // For taskloops:
4504   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4505   // tt->task_data.shareds);
4506   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4507       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4508   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4509       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4510       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4511   const auto *KmpTaskTWithPrivatesQTyRD =
4512       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4513   LValue Base =
4514       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4515   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4516   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4517   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4518   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4519 
4520   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4521   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4522   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4524       CGF.ConvertTypeForMem(SharedsPtrTy));
4525 
4526   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4527   llvm::Value *PrivatesParam;
4528   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4529     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4530     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4531         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4532   } else {
4533     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4534   }
4535 
4536   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4537                                TaskPrivatesMap,
4538                                CGF.Builder
4539                                    .CreatePointerBitCastOrAddrSpaceCast(
4540                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4541                                    .getPointer()};
4542   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4543                                           std::end(CommonArgs));
4544   if (isOpenMPTaskLoopDirective(Kind)) {
4545     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4546     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4547     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4548     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4549     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4550     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4551     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4552     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4553     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4554     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4555     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4556     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4557     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4558     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4559     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4560     CallArgs.push_back(LBParam);
4561     CallArgs.push_back(UBParam);
4562     CallArgs.push_back(StParam);
4563     CallArgs.push_back(LIParam);
4564     CallArgs.push_back(RParam);
4565   }
4566   CallArgs.push_back(SharedsParam);
4567 
4568   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4569                                                   CallArgs);
4570   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4571                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4572   CGF.FinishFunction();
4573   return TaskEntry;
4574 }
4575 
4576 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4577                                             SourceLocation Loc,
4578                                             QualType KmpInt32Ty,
4579                                             QualType KmpTaskTWithPrivatesPtrQTy,
4580                                             QualType KmpTaskTWithPrivatesQTy) {
4581   ASTContext &C = CGM.getContext();
4582   FunctionArgList Args;
4583   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4584                             ImplicitParamDecl::Other);
4585   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4586                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4587                                 ImplicitParamDecl::Other);
4588   Args.push_back(&GtidArg);
4589   Args.push_back(&TaskTypeArg);
4590   const auto &DestructorFnInfo =
4591       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4592   llvm::FunctionType *DestructorFnTy =
4593       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4594   std::string Name =
4595       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4596   auto *DestructorFn =
4597       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4598                              Name, &CGM.getModule());
4599   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4600                                     DestructorFnInfo);
4601   DestructorFn->setDoesNotRecurse();
4602   CodeGenFunction CGF(CGM);
4603   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4604                     Args, Loc, Loc);
4605 
4606   LValue Base = CGF.EmitLoadOfPointerLValue(
4607       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4608       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4609   const auto *KmpTaskTWithPrivatesQTyRD =
4610       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4611   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4612   Base = CGF.EmitLValueForField(Base, *FI);
4613   for (const auto *Field :
4614        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4615     if (QualType::DestructionKind DtorKind =
4616             Field->getType().isDestructedType()) {
4617       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4618       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4619     }
4620   }
4621   CGF.FinishFunction();
4622   return DestructorFn;
4623 }
4624 
4625 /// Emit a privates mapping function for correct handling of private and
4626 /// firstprivate variables.
4627 /// \code
4628 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4629 /// **noalias priv1,...,  <tyn> **noalias privn) {
4630 ///   *priv1 = &.privates.priv1;
4631 ///   ...;
4632 ///   *privn = &.privates.privn;
4633 /// }
4634 /// \endcode
4635 static llvm::Value *
4636 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4637                                ArrayRef<const Expr *> PrivateVars,
4638                                ArrayRef<const Expr *> FirstprivateVars,
4639                                ArrayRef<const Expr *> LastprivateVars,
4640                                QualType PrivatesQTy,
4641                                ArrayRef<PrivateDataTy> Privates) {
4642   ASTContext &C = CGM.getContext();
4643   FunctionArgList Args;
4644   ImplicitParamDecl TaskPrivatesArg(
4645       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4647       ImplicitParamDecl::Other);
4648   Args.push_back(&TaskPrivatesArg);
4649   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4650   unsigned Counter = 1;
4651   for (const Expr *E : PrivateVars) {
4652     Args.push_back(ImplicitParamDecl::Create(
4653         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4654         C.getPointerType(C.getPointerType(E->getType()))
4655             .withConst()
4656             .withRestrict(),
4657         ImplicitParamDecl::Other));
4658     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4659     PrivateVarsPos[VD] = Counter;
4660     ++Counter;
4661   }
4662   for (const Expr *E : FirstprivateVars) {
4663     Args.push_back(ImplicitParamDecl::Create(
4664         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4665         C.getPointerType(C.getPointerType(E->getType()))
4666             .withConst()
4667             .withRestrict(),
4668         ImplicitParamDecl::Other));
4669     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4670     PrivateVarsPos[VD] = Counter;
4671     ++Counter;
4672   }
4673   for (const Expr *E : LastprivateVars) {
4674     Args.push_back(ImplicitParamDecl::Create(
4675         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676         C.getPointerType(C.getPointerType(E->getType()))
4677             .withConst()
4678             .withRestrict(),
4679         ImplicitParamDecl::Other));
4680     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681     PrivateVarsPos[VD] = Counter;
4682     ++Counter;
4683   }
4684   const auto &TaskPrivatesMapFnInfo =
4685       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4686   llvm::FunctionType *TaskPrivatesMapTy =
4687       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4688   std::string Name =
4689       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4690   auto *TaskPrivatesMap = llvm::Function::Create(
4691       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4692       &CGM.getModule());
4693   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4694                                     TaskPrivatesMapFnInfo);
4695   if (CGM.getLangOpts().Optimize) {
4696     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4697     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4698     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4699   }
4700   CodeGenFunction CGF(CGM);
4701   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4702                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4703 
4704   // *privi = &.privates.privi;
4705   LValue Base = CGF.EmitLoadOfPointerLValue(
4706       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4707       TaskPrivatesArg.getType()->castAs<PointerType>());
4708   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4709   Counter = 0;
4710   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4711     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4712     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4713     LValue RefLVal =
4714         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4715     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4716         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4717     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4718     ++Counter;
4719   }
4720   CGF.FinishFunction();
4721   return TaskPrivatesMap;
4722 }
4723 
4724 /// Emit initialization for private variables in task-based directives.
4725 static void emitPrivatesInit(CodeGenFunction &CGF,
4726                              const OMPExecutableDirective &D,
4727                              Address KmpTaskSharedsPtr, LValue TDBase,
4728                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4729                              QualType SharedsTy, QualType SharedsPtrTy,
4730                              const OMPTaskDataTy &Data,
4731                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4732   ASTContext &C = CGF.getContext();
4733   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4734   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4735   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4736                                  ? OMPD_taskloop
4737                                  : OMPD_task;
4738   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4739   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4740   LValue SrcBase;
4741   bool IsTargetTask =
4742       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4743       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4744   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4745   // PointersArray and SizesArray. The original variables for these arrays are
4746   // not captured and we get their addresses explicitly.
4747   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4748       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4749     SrcBase = CGF.MakeAddrLValue(
4750         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4751             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4752         SharedsTy);
4753   }
4754   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4755   for (const PrivateDataTy &Pair : Privates) {
4756     const VarDecl *VD = Pair.second.PrivateCopy;
4757     const Expr *Init = VD->getAnyInitializer();
4758     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4759                              !CGF.isTrivialInitializer(Init)))) {
4760       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4761       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4762         const VarDecl *OriginalVD = Pair.second.Original;
4763         // Check if the variable is the target-based BasePointersArray,
4764         // PointersArray or SizesArray.
4765         LValue SharedRefLValue;
4766         QualType Type = PrivateLValue.getType();
4767         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4768         if (IsTargetTask && !SharedField) {
4769           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4770                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4771                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4772                          ->getNumParams() == 0 &&
4773                  isa<TranslationUnitDecl>(
4774                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4775                          ->getDeclContext()) &&
4776                  "Expected artificial target data variable.");
4777           SharedRefLValue =
4778               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4779         } else {
4780           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4781           SharedRefLValue = CGF.MakeAddrLValue(
4782               Address(SharedRefLValue.getPointer(CGF),
4783                       C.getDeclAlign(OriginalVD)),
4784               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4785               SharedRefLValue.getTBAAInfo());
4786         }
4787         if (Type->isArrayType()) {
4788           // Initialize firstprivate array.
4789           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4790             // Perform simple memcpy.
4791             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4792           } else {
4793             // Initialize firstprivate array using element-by-element
4794             // initialization.
4795             CGF.EmitOMPAggregateAssign(
4796                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4797                 Type,
4798                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4799                                                   Address SrcElement) {
4800                   // Clean up any temporaries needed by the initialization.
4801                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4802                   InitScope.addPrivate(
4803                       Elem, [SrcElement]() -> Address { return SrcElement; });
4804                   (void)InitScope.Privatize();
4805                   // Emit initialization for single element.
4806                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4807                       CGF, &CapturesInfo);
4808                   CGF.EmitAnyExprToMem(Init, DestElement,
4809                                        Init->getType().getQualifiers(),
4810                                        /*IsInitializer=*/false);
4811                 });
4812           }
4813         } else {
4814           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4815           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4816             return SharedRefLValue.getAddress(CGF);
4817           });
4818           (void)InitScope.Privatize();
4819           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4820           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4821                              /*capturedByInit=*/false);
4822         }
4823       } else {
4824         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4825       }
4826     }
4827     ++FI;
4828   }
4829 }
4830 
4831 /// Check if duplication function is required for taskloops.
4832 static bool checkInitIsRequired(CodeGenFunction &CGF,
4833                                 ArrayRef<PrivateDataTy> Privates) {
4834   bool InitRequired = false;
4835   for (const PrivateDataTy &Pair : Privates) {
4836     const VarDecl *VD = Pair.second.PrivateCopy;
4837     const Expr *Init = VD->getAnyInitializer();
4838     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4839                                     !CGF.isTrivialInitializer(Init));
4840     if (InitRequired)
4841       break;
4842   }
4843   return InitRequired;
4844 }
4845 
4846 
4847 /// Emit task_dup function (for initialization of
4848 /// private/firstprivate/lastprivate vars and last_iter flag)
4849 /// \code
4850 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4851 /// lastpriv) {
4852 /// // setup lastprivate flag
4853 ///    task_dst->last = lastpriv;
4854 /// // could be constructor calls here...
4855 /// }
4856 /// \endcode
4857 static llvm::Value *
4858 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4859                     const OMPExecutableDirective &D,
4860                     QualType KmpTaskTWithPrivatesPtrQTy,
4861                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4862                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4863                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4864                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4865   ASTContext &C = CGM.getContext();
4866   FunctionArgList Args;
4867   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4868                            KmpTaskTWithPrivatesPtrQTy,
4869                            ImplicitParamDecl::Other);
4870   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4871                            KmpTaskTWithPrivatesPtrQTy,
4872                            ImplicitParamDecl::Other);
4873   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4874                                 ImplicitParamDecl::Other);
4875   Args.push_back(&DstArg);
4876   Args.push_back(&SrcArg);
4877   Args.push_back(&LastprivArg);
4878   const auto &TaskDupFnInfo =
4879       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4880   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4881   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4882   auto *TaskDup = llvm::Function::Create(
4883       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4884   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4885   TaskDup->setDoesNotRecurse();
4886   CodeGenFunction CGF(CGM);
4887   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4888                     Loc);
4889 
4890   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4891       CGF.GetAddrOfLocalVar(&DstArg),
4892       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4893   // task_dst->liter = lastpriv;
4894   if (WithLastIter) {
4895     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4896     LValue Base = CGF.EmitLValueForField(
4897         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4898     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4899     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4900         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4901     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4902   }
4903 
4904   // Emit initial values for private copies (if any).
4905   assert(!Privates.empty());
4906   Address KmpTaskSharedsPtr = Address::invalid();
4907   if (!Data.FirstprivateVars.empty()) {
4908     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4909         CGF.GetAddrOfLocalVar(&SrcArg),
4910         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4911     LValue Base = CGF.EmitLValueForField(
4912         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4913     KmpTaskSharedsPtr = Address(
4914         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4915                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4916                                                   KmpTaskTShareds)),
4917                              Loc),
4918         CGF.getNaturalTypeAlignment(SharedsTy));
4919   }
4920   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4921                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4922   CGF.FinishFunction();
4923   return TaskDup;
4924 }
4925 
4926 /// Checks if destructor function is required to be generated.
4927 /// \return true if cleanups are required, false otherwise.
4928 static bool
4929 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4930   bool NeedsCleanup = false;
4931   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4932   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4933   for (const FieldDecl *FD : PrivateRD->fields()) {
4934     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4935     if (NeedsCleanup)
4936       break;
4937   }
4938   return NeedsCleanup;
4939 }
4940 
4941 CGOpenMPRuntime::TaskResultTy
4942 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4943                               const OMPExecutableDirective &D,
4944                               llvm::Function *TaskFunction, QualType SharedsTy,
4945                               Address Shareds, const OMPTaskDataTy &Data) {
4946   ASTContext &C = CGM.getContext();
4947   llvm::SmallVector<PrivateDataTy, 4> Privates;
4948   // Aggregate privates and sort them by the alignment.
4949   auto I = Data.PrivateCopies.begin();
4950   for (const Expr *E : Data.PrivateVars) {
4951     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4952     Privates.emplace_back(
4953         C.getDeclAlign(VD),
4954         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4955                          /*PrivateElemInit=*/nullptr));
4956     ++I;
4957   }
4958   I = Data.FirstprivateCopies.begin();
4959   auto IElemInitRef = Data.FirstprivateInits.begin();
4960   for (const Expr *E : Data.FirstprivateVars) {
4961     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4962     Privates.emplace_back(
4963         C.getDeclAlign(VD),
4964         PrivateHelpersTy(
4965             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4966             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4967     ++I;
4968     ++IElemInitRef;
4969   }
4970   I = Data.LastprivateCopies.begin();
4971   for (const Expr *E : Data.LastprivateVars) {
4972     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4973     Privates.emplace_back(
4974         C.getDeclAlign(VD),
4975         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4976                          /*PrivateElemInit=*/nullptr));
4977     ++I;
4978   }
4979   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4980     return L.first > R.first;
4981   });
4982   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4983   // Build type kmp_routine_entry_t (if not built yet).
4984   emitKmpRoutineEntryT(KmpInt32Ty);
4985   // Build type kmp_task_t (if not built yet).
4986   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4987     if (SavedKmpTaskloopTQTy.isNull()) {
4988       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4989           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4990     }
4991     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4992   } else {
4993     assert((D.getDirectiveKind() == OMPD_task ||
4994             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4995             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4996            "Expected taskloop, task or target directive");
4997     if (SavedKmpTaskTQTy.isNull()) {
4998       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4999           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5000     }
5001     KmpTaskTQTy = SavedKmpTaskTQTy;
5002   }
5003   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5004   // Build particular struct kmp_task_t for the given task.
5005   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5006       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5007   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5008   QualType KmpTaskTWithPrivatesPtrQTy =
5009       C.getPointerType(KmpTaskTWithPrivatesQTy);
5010   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5011   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5012       KmpTaskTWithPrivatesTy->getPointerTo();
5013   llvm::Value *KmpTaskTWithPrivatesTySize =
5014       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5015   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5016 
5017   // Emit initial values for private copies (if any).
5018   llvm::Value *TaskPrivatesMap = nullptr;
5019   llvm::Type *TaskPrivatesMapTy =
5020       std::next(TaskFunction->arg_begin(), 3)->getType();
5021   if (!Privates.empty()) {
5022     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5023     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5024         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5025         FI->getType(), Privates);
5026     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5027         TaskPrivatesMap, TaskPrivatesMapTy);
5028   } else {
5029     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5030         cast<llvm::PointerType>(TaskPrivatesMapTy));
5031   }
5032   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5033   // kmp_task_t *tt);
5034   llvm::Function *TaskEntry = emitProxyTaskFunction(
5035       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5036       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5037       TaskPrivatesMap);
5038 
5039   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5040   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5041   // kmp_routine_entry_t *task_entry);
5042   // Task flags. Format is taken from
5043   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5044   // description of kmp_tasking_flags struct.
5045   enum {
5046     TiedFlag = 0x1,
5047     FinalFlag = 0x2,
5048     DestructorsFlag = 0x8,
5049     PriorityFlag = 0x20
5050   };
5051   unsigned Flags = Data.Tied ? TiedFlag : 0;
5052   bool NeedsCleanup = false;
5053   if (!Privates.empty()) {
5054     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5055     if (NeedsCleanup)
5056       Flags = Flags | DestructorsFlag;
5057   }
5058   if (Data.Priority.getInt())
5059     Flags = Flags | PriorityFlag;
5060   llvm::Value *TaskFlags =
5061       Data.Final.getPointer()
5062           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5063                                      CGF.Builder.getInt32(FinalFlag),
5064                                      CGF.Builder.getInt32(/*C=*/0))
5065           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5066   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5067   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5068   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5069       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5070       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5071           TaskEntry, KmpRoutineEntryPtrTy)};
5072   llvm::Value *NewTask;
5073   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5074     // Check if we have any device clause associated with the directive.
5075     const Expr *Device = nullptr;
5076     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5077       Device = C->getDevice();
5078     // Emit device ID if any otherwise use default value.
5079     llvm::Value *DeviceID;
5080     if (Device)
5081       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5082                                            CGF.Int64Ty, /*isSigned=*/true);
5083     else
5084       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5085     AllocArgs.push_back(DeviceID);
5086     NewTask = CGF.EmitRuntimeCall(
5087       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5088   } else {
5089     NewTask = CGF.EmitRuntimeCall(
5090       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5091   }
5092   llvm::Value *NewTaskNewTaskTTy =
5093       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5094           NewTask, KmpTaskTWithPrivatesPtrTy);
5095   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5096                                                KmpTaskTWithPrivatesQTy);
5097   LValue TDBase =
5098       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5099   // Fill the data in the resulting kmp_task_t record.
5100   // Copy shareds if there are any.
5101   Address KmpTaskSharedsPtr = Address::invalid();
5102   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5103     KmpTaskSharedsPtr =
5104         Address(CGF.EmitLoadOfScalar(
5105                     CGF.EmitLValueForField(
5106                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5107                                            KmpTaskTShareds)),
5108                     Loc),
5109                 CGF.getNaturalTypeAlignment(SharedsTy));
5110     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5111     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5112     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5113   }
5114   // Emit initial values for private copies (if any).
5115   TaskResultTy Result;
5116   if (!Privates.empty()) {
5117     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5118                      SharedsTy, SharedsPtrTy, Data, Privates,
5119                      /*ForDup=*/false);
5120     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5121         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5122       Result.TaskDupFn = emitTaskDupFunction(
5123           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5124           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5125           /*WithLastIter=*/!Data.LastprivateVars.empty());
5126     }
5127   }
5128   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5129   enum { Priority = 0, Destructors = 1 };
5130   // Provide pointer to function with destructors for privates.
5131   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5132   const RecordDecl *KmpCmplrdataUD =
5133       (*FI)->getType()->getAsUnionType()->getDecl();
5134   if (NeedsCleanup) {
5135     llvm::Value *DestructorFn = emitDestructorsFunction(
5136         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5137         KmpTaskTWithPrivatesQTy);
5138     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5139     LValue DestructorsLV = CGF.EmitLValueForField(
5140         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5141     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5142                               DestructorFn, KmpRoutineEntryPtrTy),
5143                           DestructorsLV);
5144   }
5145   // Set priority.
5146   if (Data.Priority.getInt()) {
5147     LValue Data2LV = CGF.EmitLValueForField(
5148         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5149     LValue PriorityLV = CGF.EmitLValueForField(
5150         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5151     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5152   }
5153   Result.NewTask = NewTask;
5154   Result.TaskEntry = TaskEntry;
5155   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5156   Result.TDBase = TDBase;
5157   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5158   return Result;
5159 }
5160 
5161 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5162                                    const OMPExecutableDirective &D,
5163                                    llvm::Function *TaskFunction,
5164                                    QualType SharedsTy, Address Shareds,
5165                                    const Expr *IfCond,
5166                                    const OMPTaskDataTy &Data) {
5167   if (!CGF.HaveInsertPoint())
5168     return;
5169 
5170   TaskResultTy Result =
5171       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5172   llvm::Value *NewTask = Result.NewTask;
5173   llvm::Function *TaskEntry = Result.TaskEntry;
5174   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5175   LValue TDBase = Result.TDBase;
5176   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5177   ASTContext &C = CGM.getContext();
5178   // Process list of dependences.
5179   Address DependenciesArray = Address::invalid();
5180   unsigned NumDependencies = Data.Dependences.size();
5181   if (NumDependencies) {
5182     // Dependence kind for RTL.
5183     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5184     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5185     RecordDecl *KmpDependInfoRD;
5186     QualType FlagsTy =
5187         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5188     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5189     if (KmpDependInfoTy.isNull()) {
5190       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5191       KmpDependInfoRD->startDefinition();
5192       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5193       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5194       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5195       KmpDependInfoRD->completeDefinition();
5196       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5197     } else {
5198       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5199     }
5200     // Define type kmp_depend_info[<Dependences.size()>];
5201     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5202         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5203         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5204     // kmp_depend_info[<Dependences.size()>] deps;
5205     DependenciesArray =
5206         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5207     for (unsigned I = 0; I < NumDependencies; ++I) {
5208       const Expr *E = Data.Dependences[I].second;
5209       LValue Addr = CGF.EmitLValue(E);
5210       llvm::Value *Size;
5211       QualType Ty = E->getType();
5212       if (const auto *ASE =
5213               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5214         LValue UpAddrLVal =
5215             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5216         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5217             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5218         llvm::Value *LowIntPtr =
5219             CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5220         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5221         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5222       } else {
5223         Size = CGF.getTypeSize(Ty);
5224       }
5225       LValue Base = CGF.MakeAddrLValue(
5226           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5227           KmpDependInfoTy);
5228       // deps[i].base_addr = &<Dependences[i].second>;
5229       LValue BaseAddrLVal = CGF.EmitLValueForField(
5230           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5231       CGF.EmitStoreOfScalar(
5232           CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5233           BaseAddrLVal);
5234       // deps[i].len = sizeof(<Dependences[i].second>);
5235       LValue LenLVal = CGF.EmitLValueForField(
5236           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5237       CGF.EmitStoreOfScalar(Size, LenLVal);
5238       // deps[i].flags = <Dependences[i].first>;
5239       RTLDependenceKindTy DepKind;
5240       switch (Data.Dependences[I].first) {
5241       case OMPC_DEPEND_in:
5242         DepKind = DepIn;
5243         break;
5244       // Out and InOut dependencies must use the same code.
5245       case OMPC_DEPEND_out:
5246       case OMPC_DEPEND_inout:
5247         DepKind = DepInOut;
5248         break;
5249       case OMPC_DEPEND_mutexinoutset:
5250         DepKind = DepMutexInOutSet;
5251         break;
5252       case OMPC_DEPEND_source:
5253       case OMPC_DEPEND_sink:
5254       case OMPC_DEPEND_unknown:
5255         llvm_unreachable("Unknown task dependence type");
5256       }
5257       LValue FlagsLVal = CGF.EmitLValueForField(
5258           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5259       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5260                             FlagsLVal);
5261     }
5262     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5263         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5264   }
5265 
5266   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5267   // libcall.
5268   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5269   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5270   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5271   // list is not empty
5272   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5273   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5274   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5275   llvm::Value *DepTaskArgs[7];
5276   if (NumDependencies) {
5277     DepTaskArgs[0] = UpLoc;
5278     DepTaskArgs[1] = ThreadID;
5279     DepTaskArgs[2] = NewTask;
5280     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5281     DepTaskArgs[4] = DependenciesArray.getPointer();
5282     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5283     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5284   }
5285   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5286                         &TaskArgs,
5287                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5288     if (!Data.Tied) {
5289       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5290       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5291       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5292     }
5293     if (NumDependencies) {
5294       CGF.EmitRuntimeCall(
5295           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5296     } else {
5297       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5298                           TaskArgs);
5299     }
5300     // Check if parent region is untied and build return for untied task;
5301     if (auto *Region =
5302             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5303       Region->emitUntiedSwitch(CGF);
5304   };
5305 
5306   llvm::Value *DepWaitTaskArgs[6];
5307   if (NumDependencies) {
5308     DepWaitTaskArgs[0] = UpLoc;
5309     DepWaitTaskArgs[1] = ThreadID;
5310     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5311     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5312     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5313     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5314   }
5315   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5316                         NumDependencies, &DepWaitTaskArgs,
5317                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5318     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5319     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5320     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5321     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5322     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5323     // is specified.
5324     if (NumDependencies)
5325       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5326                           DepWaitTaskArgs);
5327     // Call proxy_task_entry(gtid, new_task);
5328     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5329                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5330       Action.Enter(CGF);
5331       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5332       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5333                                                           OutlinedFnArgs);
5334     };
5335 
5336     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5337     // kmp_task_t *new_task);
5338     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5339     // kmp_task_t *new_task);
5340     RegionCodeGenTy RCG(CodeGen);
5341     CommonActionTy Action(
5342         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5343         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5344     RCG.setAction(Action);
5345     RCG(CGF);
5346   };
5347 
5348   if (IfCond) {
5349     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5350   } else {
5351     RegionCodeGenTy ThenRCG(ThenCodeGen);
5352     ThenRCG(CGF);
5353   }
5354 }
5355 
5356 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5357                                        const OMPLoopDirective &D,
5358                                        llvm::Function *TaskFunction,
5359                                        QualType SharedsTy, Address Shareds,
5360                                        const Expr *IfCond,
5361                                        const OMPTaskDataTy &Data) {
5362   if (!CGF.HaveInsertPoint())
5363     return;
5364   TaskResultTy Result =
5365       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5366   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5367   // libcall.
5368   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5369   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5370   // sched, kmp_uint64 grainsize, void *task_dup);
5371   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5372   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5373   llvm::Value *IfVal;
5374   if (IfCond) {
5375     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5376                                       /*isSigned=*/true);
5377   } else {
5378     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5379   }
5380 
5381   LValue LBLVal = CGF.EmitLValueForField(
5382       Result.TDBase,
5383       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5384   const auto *LBVar =
5385       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5386   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5387                        LBLVal.getQuals(),
5388                        /*IsInitializer=*/true);
5389   LValue UBLVal = CGF.EmitLValueForField(
5390       Result.TDBase,
5391       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5392   const auto *UBVar =
5393       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5394   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5395                        UBLVal.getQuals(),
5396                        /*IsInitializer=*/true);
5397   LValue StLVal = CGF.EmitLValueForField(
5398       Result.TDBase,
5399       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5400   const auto *StVar =
5401       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5402   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5403                        StLVal.getQuals(),
5404                        /*IsInitializer=*/true);
5405   // Store reductions address.
5406   LValue RedLVal = CGF.EmitLValueForField(
5407       Result.TDBase,
5408       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5409   if (Data.Reductions) {
5410     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5411   } else {
5412     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5413                                CGF.getContext().VoidPtrTy);
5414   }
5415   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5416   llvm::Value *TaskArgs[] = {
5417       UpLoc,
5418       ThreadID,
5419       Result.NewTask,
5420       IfVal,
5421       LBLVal.getPointer(CGF),
5422       UBLVal.getPointer(CGF),
5423       CGF.EmitLoadOfScalar(StLVal, Loc),
5424       llvm::ConstantInt::getSigned(
5425           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5426       llvm::ConstantInt::getSigned(
5427           CGF.IntTy, Data.Schedule.getPointer()
5428                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5429                          : NoSchedule),
5430       Data.Schedule.getPointer()
5431           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5432                                       /*isSigned=*/false)
5433           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5434       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5435                              Result.TaskDupFn, CGF.VoidPtrTy)
5436                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5437   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5438 }
5439 
5440 /// Emit reduction operation for each element of array (required for
5441 /// array sections) LHS op = RHS.
5442 /// \param Type Type of array.
5443 /// \param LHSVar Variable on the left side of the reduction operation
5444 /// (references element of array in original variable).
5445 /// \param RHSVar Variable on the right side of the reduction operation
5446 /// (references element of array in original variable).
5447 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5448 /// RHSVar.
5449 static void EmitOMPAggregateReduction(
5450     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5451     const VarDecl *RHSVar,
5452     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5453                                   const Expr *, const Expr *)> &RedOpGen,
5454     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5455     const Expr *UpExpr = nullptr) {
5456   // Perform element-by-element initialization.
5457   QualType ElementTy;
5458   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5459   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5460 
5461   // Drill down to the base element type on both arrays.
5462   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5463   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5464 
5465   llvm::Value *RHSBegin = RHSAddr.getPointer();
5466   llvm::Value *LHSBegin = LHSAddr.getPointer();
5467   // Cast from pointer to array type to pointer to single element.
5468   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5469   // The basic structure here is a while-do loop.
5470   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5471   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5472   llvm::Value *IsEmpty =
5473       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5474   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5475 
5476   // Enter the loop body, making that address the current address.
5477   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5478   CGF.EmitBlock(BodyBB);
5479 
5480   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5481 
5482   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5483       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5484   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5485   Address RHSElementCurrent =
5486       Address(RHSElementPHI,
5487               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5488 
5489   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5490       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5491   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5492   Address LHSElementCurrent =
5493       Address(LHSElementPHI,
5494               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5495 
5496   // Emit copy.
5497   CodeGenFunction::OMPPrivateScope Scope(CGF);
5498   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5499   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5500   Scope.Privatize();
5501   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5502   Scope.ForceCleanup();
5503 
5504   // Shift the address forward by one element.
5505   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5506       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5507   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5508       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5509   // Check whether we've reached the end.
5510   llvm::Value *Done =
5511       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5512   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5513   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5514   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5515 
5516   // Done.
5517   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5518 }
5519 
5520 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5521 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5522 /// UDR combiner function.
5523 static void emitReductionCombiner(CodeGenFunction &CGF,
5524                                   const Expr *ReductionOp) {
5525   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5526     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5527       if (const auto *DRE =
5528               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5529         if (const auto *DRD =
5530                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5531           std::pair<llvm::Function *, llvm::Function *> Reduction =
5532               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5533           RValue Func = RValue::get(Reduction.first);
5534           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5535           CGF.EmitIgnoredExpr(ReductionOp);
5536           return;
5537         }
5538   CGF.EmitIgnoredExpr(ReductionOp);
5539 }
5540 
5541 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5542     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5543     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5544     ArrayRef<const Expr *> ReductionOps) {
5545   ASTContext &C = CGM.getContext();
5546 
5547   // void reduction_func(void *LHSArg, void *RHSArg);
5548   FunctionArgList Args;
5549   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5550                            ImplicitParamDecl::Other);
5551   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5552                            ImplicitParamDecl::Other);
5553   Args.push_back(&LHSArg);
5554   Args.push_back(&RHSArg);
5555   const auto &CGFI =
5556       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5557   std::string Name = getName({"omp", "reduction", "reduction_func"});
5558   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5559                                     llvm::GlobalValue::InternalLinkage, Name,
5560                                     &CGM.getModule());
5561   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5562   Fn->setDoesNotRecurse();
5563   CodeGenFunction CGF(CGM);
5564   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5565 
5566   // Dst = (void*[n])(LHSArg);
5567   // Src = (void*[n])(RHSArg);
5568   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5570       ArgsType), CGF.getPointerAlign());
5571   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5572       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5573       ArgsType), CGF.getPointerAlign());
5574 
5575   //  ...
5576   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5577   //  ...
5578   CodeGenFunction::OMPPrivateScope Scope(CGF);
5579   auto IPriv = Privates.begin();
5580   unsigned Idx = 0;
5581   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5582     const auto *RHSVar =
5583         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5584     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5585       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5586     });
5587     const auto *LHSVar =
5588         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5589     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5590       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5591     });
5592     QualType PrivTy = (*IPriv)->getType();
5593     if (PrivTy->isVariablyModifiedType()) {
5594       // Get array size and emit VLA type.
5595       ++Idx;
5596       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5597       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5598       const VariableArrayType *VLA =
5599           CGF.getContext().getAsVariableArrayType(PrivTy);
5600       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5601       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5602           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5603       CGF.EmitVariablyModifiedType(PrivTy);
5604     }
5605   }
5606   Scope.Privatize();
5607   IPriv = Privates.begin();
5608   auto ILHS = LHSExprs.begin();
5609   auto IRHS = RHSExprs.begin();
5610   for (const Expr *E : ReductionOps) {
5611     if ((*IPriv)->getType()->isArrayType()) {
5612       // Emit reduction for array section.
5613       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5614       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5615       EmitOMPAggregateReduction(
5616           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5617           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5618             emitReductionCombiner(CGF, E);
5619           });
5620     } else {
5621       // Emit reduction for array subscript or single variable.
5622       emitReductionCombiner(CGF, E);
5623     }
5624     ++IPriv;
5625     ++ILHS;
5626     ++IRHS;
5627   }
5628   Scope.ForceCleanup();
5629   CGF.FinishFunction();
5630   return Fn;
5631 }
5632 
5633 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5634                                                   const Expr *ReductionOp,
5635                                                   const Expr *PrivateRef,
5636                                                   const DeclRefExpr *LHS,
5637                                                   const DeclRefExpr *RHS) {
5638   if (PrivateRef->getType()->isArrayType()) {
5639     // Emit reduction for array section.
5640     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5641     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5642     EmitOMPAggregateReduction(
5643         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5644         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5645           emitReductionCombiner(CGF, ReductionOp);
5646         });
5647   } else {
5648     // Emit reduction for array subscript or single variable.
5649     emitReductionCombiner(CGF, ReductionOp);
5650   }
5651 }
5652 
5653 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5654                                     ArrayRef<const Expr *> Privates,
5655                                     ArrayRef<const Expr *> LHSExprs,
5656                                     ArrayRef<const Expr *> RHSExprs,
5657                                     ArrayRef<const Expr *> ReductionOps,
5658                                     ReductionOptionsTy Options) {
5659   if (!CGF.HaveInsertPoint())
5660     return;
5661 
5662   bool WithNowait = Options.WithNowait;
5663   bool SimpleReduction = Options.SimpleReduction;
5664 
5665   // Next code should be emitted for reduction:
5666   //
5667   // static kmp_critical_name lock = { 0 };
5668   //
5669   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5670   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5671   //  ...
5672   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5673   //  *(Type<n>-1*)rhs[<n>-1]);
5674   // }
5675   //
5676   // ...
5677   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5678   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5679   // RedList, reduce_func, &<lock>)) {
5680   // case 1:
5681   //  ...
5682   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5683   //  ...
5684   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   // break;
5686   // case 2:
5687   //  ...
5688   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5689   //  ...
5690   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5691   // break;
5692   // default:;
5693   // }
5694   //
5695   // if SimpleReduction is true, only the next code is generated:
5696   //  ...
5697   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5698   //  ...
5699 
5700   ASTContext &C = CGM.getContext();
5701 
5702   if (SimpleReduction) {
5703     CodeGenFunction::RunCleanupsScope Scope(CGF);
5704     auto IPriv = Privates.begin();
5705     auto ILHS = LHSExprs.begin();
5706     auto IRHS = RHSExprs.begin();
5707     for (const Expr *E : ReductionOps) {
5708       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5709                                   cast<DeclRefExpr>(*IRHS));
5710       ++IPriv;
5711       ++ILHS;
5712       ++IRHS;
5713     }
5714     return;
5715   }
5716 
5717   // 1. Build a list of reduction variables.
5718   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5719   auto Size = RHSExprs.size();
5720   for (const Expr *E : Privates) {
5721     if (E->getType()->isVariablyModifiedType())
5722       // Reserve place for array size.
5723       ++Size;
5724   }
5725   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5726   QualType ReductionArrayTy =
5727       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5728                              /*IndexTypeQuals=*/0);
5729   Address ReductionList =
5730       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5731   auto IPriv = Privates.begin();
5732   unsigned Idx = 0;
5733   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5734     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5735     CGF.Builder.CreateStore(
5736         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5737             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5738         Elem);
5739     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5740       // Store array size.
5741       ++Idx;
5742       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5743       llvm::Value *Size = CGF.Builder.CreateIntCast(
5744           CGF.getVLASize(
5745                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5746               .NumElts,
5747           CGF.SizeTy, /*isSigned=*/false);
5748       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5749                               Elem);
5750     }
5751   }
5752 
5753   // 2. Emit reduce_func().
5754   llvm::Function *ReductionFn = emitReductionFunction(
5755       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5756       LHSExprs, RHSExprs, ReductionOps);
5757 
5758   // 3. Create static kmp_critical_name lock = { 0 };
5759   std::string Name = getName({"reduction"});
5760   llvm::Value *Lock = getCriticalRegionLock(Name);
5761 
5762   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5763   // RedList, reduce_func, &<lock>);
5764   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5765   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5766   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5767   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5768       ReductionList.getPointer(), CGF.VoidPtrTy);
5769   llvm::Value *Args[] = {
5770       IdentTLoc,                             // ident_t *<loc>
5771       ThreadId,                              // i32 <gtid>
5772       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5773       ReductionArrayTySize,                  // size_type sizeof(RedList)
5774       RL,                                    // void *RedList
5775       ReductionFn, // void (*) (void *, void *) <reduce_func>
5776       Lock         // kmp_critical_name *&<lock>
5777   };
5778   llvm::Value *Res = CGF.EmitRuntimeCall(
5779       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5780                                        : OMPRTL__kmpc_reduce),
5781       Args);
5782 
5783   // 5. Build switch(res)
5784   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5785   llvm::SwitchInst *SwInst =
5786       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5787 
5788   // 6. Build case 1:
5789   //  ...
5790   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5791   //  ...
5792   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5793   // break;
5794   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5795   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5796   CGF.EmitBlock(Case1BB);
5797 
5798   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5799   llvm::Value *EndArgs[] = {
5800       IdentTLoc, // ident_t *<loc>
5801       ThreadId,  // i32 <gtid>
5802       Lock       // kmp_critical_name *&<lock>
5803   };
5804   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5805                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5806     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807     auto IPriv = Privates.begin();
5808     auto ILHS = LHSExprs.begin();
5809     auto IRHS = RHSExprs.begin();
5810     for (const Expr *E : ReductionOps) {
5811       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5812                                      cast<DeclRefExpr>(*IRHS));
5813       ++IPriv;
5814       ++ILHS;
5815       ++IRHS;
5816     }
5817   };
5818   RegionCodeGenTy RCG(CodeGen);
5819   CommonActionTy Action(
5820       nullptr, llvm::None,
5821       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5822                                        : OMPRTL__kmpc_end_reduce),
5823       EndArgs);
5824   RCG.setAction(Action);
5825   RCG(CGF);
5826 
5827   CGF.EmitBranch(DefaultBB);
5828 
5829   // 7. Build case 2:
5830   //  ...
5831   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5832   //  ...
5833   // break;
5834   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5835   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5836   CGF.EmitBlock(Case2BB);
5837 
5838   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5839                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5840     auto ILHS = LHSExprs.begin();
5841     auto IRHS = RHSExprs.begin();
5842     auto IPriv = Privates.begin();
5843     for (const Expr *E : ReductionOps) {
5844       const Expr *XExpr = nullptr;
5845       const Expr *EExpr = nullptr;
5846       const Expr *UpExpr = nullptr;
5847       BinaryOperatorKind BO = BO_Comma;
5848       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5849         if (BO->getOpcode() == BO_Assign) {
5850           XExpr = BO->getLHS();
5851           UpExpr = BO->getRHS();
5852         }
5853       }
5854       // Try to emit update expression as a simple atomic.
5855       const Expr *RHSExpr = UpExpr;
5856       if (RHSExpr) {
5857         // Analyze RHS part of the whole expression.
5858         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5859                 RHSExpr->IgnoreParenImpCasts())) {
5860           // If this is a conditional operator, analyze its condition for
5861           // min/max reduction operator.
5862           RHSExpr = ACO->getCond();
5863         }
5864         if (const auto *BORHS =
5865                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5866           EExpr = BORHS->getRHS();
5867           BO = BORHS->getOpcode();
5868         }
5869       }
5870       if (XExpr) {
5871         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5872         auto &&AtomicRedGen = [BO, VD,
5873                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5874                                     const Expr *EExpr, const Expr *UpExpr) {
5875           LValue X = CGF.EmitLValue(XExpr);
5876           RValue E;
5877           if (EExpr)
5878             E = CGF.EmitAnyExpr(EExpr);
5879           CGF.EmitOMPAtomicSimpleUpdateExpr(
5880               X, E, BO, /*IsXLHSInRHSPart=*/true,
5881               llvm::AtomicOrdering::Monotonic, Loc,
5882               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5883                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5884                 PrivateScope.addPrivate(
5885                     VD, [&CGF, VD, XRValue, Loc]() {
5886                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5887                       CGF.emitOMPSimpleStore(
5888                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5889                           VD->getType().getNonReferenceType(), Loc);
5890                       return LHSTemp;
5891                     });
5892                 (void)PrivateScope.Privatize();
5893                 return CGF.EmitAnyExpr(UpExpr);
5894               });
5895         };
5896         if ((*IPriv)->getType()->isArrayType()) {
5897           // Emit atomic reduction for array section.
5898           const auto *RHSVar =
5899               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5900           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5901                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5902         } else {
5903           // Emit atomic reduction for array subscript or single variable.
5904           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5905         }
5906       } else {
5907         // Emit as a critical region.
5908         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5909                                            const Expr *, const Expr *) {
5910           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5911           std::string Name = RT.getName({"atomic_reduction"});
5912           RT.emitCriticalRegion(
5913               CGF, Name,
5914               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5915                 Action.Enter(CGF);
5916                 emitReductionCombiner(CGF, E);
5917               },
5918               Loc);
5919         };
5920         if ((*IPriv)->getType()->isArrayType()) {
5921           const auto *LHSVar =
5922               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5923           const auto *RHSVar =
5924               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5925           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5926                                     CritRedGen);
5927         } else {
5928           CritRedGen(CGF, nullptr, nullptr, nullptr);
5929         }
5930       }
5931       ++ILHS;
5932       ++IRHS;
5933       ++IPriv;
5934     }
5935   };
5936   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5937   if (!WithNowait) {
5938     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5939     llvm::Value *EndArgs[] = {
5940         IdentTLoc, // ident_t *<loc>
5941         ThreadId,  // i32 <gtid>
5942         Lock       // kmp_critical_name *&<lock>
5943     };
5944     CommonActionTy Action(nullptr, llvm::None,
5945                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5946                           EndArgs);
5947     AtomicRCG.setAction(Action);
5948     AtomicRCG(CGF);
5949   } else {
5950     AtomicRCG(CGF);
5951   }
5952 
5953   CGF.EmitBranch(DefaultBB);
5954   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5955 }
5956 
5957 /// Generates unique name for artificial threadprivate variables.
5958 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5959 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5960                                       const Expr *Ref) {
5961   SmallString<256> Buffer;
5962   llvm::raw_svector_ostream Out(Buffer);
5963   const clang::DeclRefExpr *DE;
5964   const VarDecl *D = ::getBaseDecl(Ref, DE);
5965   if (!D)
5966     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5967   D = D->getCanonicalDecl();
5968   std::string Name = CGM.getOpenMPRuntime().getName(
5969       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5970   Out << Prefix << Name << "_"
5971       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5972   return Out.str();
5973 }
5974 
5975 /// Emits reduction initializer function:
5976 /// \code
5977 /// void @.red_init(void* %arg) {
5978 /// %0 = bitcast void* %arg to <type>*
5979 /// store <type> <init>, <type>* %0
5980 /// ret void
5981 /// }
5982 /// \endcode
5983 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5984                                            SourceLocation Loc,
5985                                            ReductionCodeGen &RCG, unsigned N) {
5986   ASTContext &C = CGM.getContext();
5987   FunctionArgList Args;
5988   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5989                           ImplicitParamDecl::Other);
5990   Args.emplace_back(&Param);
5991   const auto &FnInfo =
5992       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5993   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5994   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5995   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5996                                     Name, &CGM.getModule());
5997   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5998   Fn->setDoesNotRecurse();
5999   CodeGenFunction CGF(CGM);
6000   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6001   Address PrivateAddr = CGF.EmitLoadOfPointer(
6002       CGF.GetAddrOfLocalVar(&Param),
6003       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004   llvm::Value *Size = nullptr;
6005   // If the size of the reduction item is non-constant, load it from global
6006   // threadprivate variable.
6007   if (RCG.getSizes(N).second) {
6008     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6009         CGF, CGM.getContext().getSizeType(),
6010         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6011     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6012                                 CGM.getContext().getSizeType(), Loc);
6013   }
6014   RCG.emitAggregateType(CGF, N, Size);
6015   LValue SharedLVal;
6016   // If initializer uses initializer from declare reduction construct, emit a
6017   // pointer to the address of the original reduction item (reuired by reduction
6018   // initializer)
6019   if (RCG.usesReductionInitializer(N)) {
6020     Address SharedAddr =
6021         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6022             CGF, CGM.getContext().VoidPtrTy,
6023             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6024     SharedAddr = CGF.EmitLoadOfPointer(
6025         SharedAddr,
6026         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6027     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6028   } else {
6029     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6030         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6031         CGM.getContext().VoidPtrTy);
6032   }
6033   // Emit the initializer:
6034   // %0 = bitcast void* %arg to <type>*
6035   // store <type> <init>, <type>* %0
6036   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6037                          [](CodeGenFunction &) { return false; });
6038   CGF.FinishFunction();
6039   return Fn;
6040 }
6041 
6042 /// Emits reduction combiner function:
6043 /// \code
6044 /// void @.red_comb(void* %arg0, void* %arg1) {
6045 /// %lhs = bitcast void* %arg0 to <type>*
6046 /// %rhs = bitcast void* %arg1 to <type>*
6047 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6048 /// store <type> %2, <type>* %lhs
6049 /// ret void
6050 /// }
6051 /// \endcode
6052 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6053                                            SourceLocation Loc,
6054                                            ReductionCodeGen &RCG, unsigned N,
6055                                            const Expr *ReductionOp,
6056                                            const Expr *LHS, const Expr *RHS,
6057                                            const Expr *PrivateRef) {
6058   ASTContext &C = CGM.getContext();
6059   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6060   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6061   FunctionArgList Args;
6062   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6063                                C.VoidPtrTy, ImplicitParamDecl::Other);
6064   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6065                             ImplicitParamDecl::Other);
6066   Args.emplace_back(&ParamInOut);
6067   Args.emplace_back(&ParamIn);
6068   const auto &FnInfo =
6069       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6070   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6071   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6072   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6073                                     Name, &CGM.getModule());
6074   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6075   Fn->setDoesNotRecurse();
6076   CodeGenFunction CGF(CGM);
6077   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6078   llvm::Value *Size = nullptr;
6079   // If the size of the reduction item is non-constant, load it from global
6080   // threadprivate variable.
6081   if (RCG.getSizes(N).second) {
6082     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6083         CGF, CGM.getContext().getSizeType(),
6084         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6085     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6086                                 CGM.getContext().getSizeType(), Loc);
6087   }
6088   RCG.emitAggregateType(CGF, N, Size);
6089   // Remap lhs and rhs variables to the addresses of the function arguments.
6090   // %lhs = bitcast void* %arg0 to <type>*
6091   // %rhs = bitcast void* %arg1 to <type>*
6092   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6093   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6094     // Pull out the pointer to the variable.
6095     Address PtrAddr = CGF.EmitLoadOfPointer(
6096         CGF.GetAddrOfLocalVar(&ParamInOut),
6097         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6098     return CGF.Builder.CreateElementBitCast(
6099         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6100   });
6101   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6102     // Pull out the pointer to the variable.
6103     Address PtrAddr = CGF.EmitLoadOfPointer(
6104         CGF.GetAddrOfLocalVar(&ParamIn),
6105         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6106     return CGF.Builder.CreateElementBitCast(
6107         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6108   });
6109   PrivateScope.Privatize();
6110   // Emit the combiner body:
6111   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6112   // store <type> %2, <type>* %lhs
6113   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6114       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6115       cast<DeclRefExpr>(RHS));
6116   CGF.FinishFunction();
6117   return Fn;
6118 }
6119 
6120 /// Emits reduction finalizer function:
6121 /// \code
6122 /// void @.red_fini(void* %arg) {
6123 /// %0 = bitcast void* %arg to <type>*
6124 /// <destroy>(<type>* %0)
6125 /// ret void
6126 /// }
6127 /// \endcode
6128 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6129                                            SourceLocation Loc,
6130                                            ReductionCodeGen &RCG, unsigned N) {
6131   if (!RCG.needCleanups(N))
6132     return nullptr;
6133   ASTContext &C = CGM.getContext();
6134   FunctionArgList Args;
6135   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6136                           ImplicitParamDecl::Other);
6137   Args.emplace_back(&Param);
6138   const auto &FnInfo =
6139       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6140   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6141   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6142   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6143                                     Name, &CGM.getModule());
6144   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6145   Fn->setDoesNotRecurse();
6146   CodeGenFunction CGF(CGM);
6147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6148   Address PrivateAddr = CGF.EmitLoadOfPointer(
6149       CGF.GetAddrOfLocalVar(&Param),
6150       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6151   llvm::Value *Size = nullptr;
6152   // If the size of the reduction item is non-constant, load it from global
6153   // threadprivate variable.
6154   if (RCG.getSizes(N).second) {
6155     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6156         CGF, CGM.getContext().getSizeType(),
6157         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6159                                 CGM.getContext().getSizeType(), Loc);
6160   }
6161   RCG.emitAggregateType(CGF, N, Size);
6162   // Emit the finalizer body:
6163   // <destroy>(<type>* %0)
6164   RCG.emitCleanups(CGF, N, PrivateAddr);
6165   CGF.FinishFunction(Loc);
6166   return Fn;
6167 }
6168 
6169 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6170     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6171     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6172   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6173     return nullptr;
6174 
6175   // Build typedef struct:
6176   // kmp_task_red_input {
6177   //   void *reduce_shar; // shared reduction item
6178   //   size_t reduce_size; // size of data item
6179   //   void *reduce_init; // data initialization routine
6180   //   void *reduce_fini; // data finalization routine
6181   //   void *reduce_comb; // data combiner routine
6182   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6183   // } kmp_task_red_input_t;
6184   ASTContext &C = CGM.getContext();
6185   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6186   RD->startDefinition();
6187   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6188   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6189   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6190   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6191   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6192   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6193       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6194   RD->completeDefinition();
6195   QualType RDType = C.getRecordType(RD);
6196   unsigned Size = Data.ReductionVars.size();
6197   llvm::APInt ArraySize(/*numBits=*/64, Size);
6198   QualType ArrayRDType = C.getConstantArrayType(
6199       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6200   // kmp_task_red_input_t .rd_input.[Size];
6201   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6202   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6203                        Data.ReductionOps);
6204   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6205     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6206     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6207                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6208     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6209         TaskRedInput.getPointer(), Idxs,
6210         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6211         ".rd_input.gep.");
6212     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6213     // ElemLVal.reduce_shar = &Shareds[Cnt];
6214     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6215     RCG.emitSharedLValue(CGF, Cnt);
6216     llvm::Value *CastedShared =
6217         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6218     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6219     RCG.emitAggregateType(CGF, Cnt);
6220     llvm::Value *SizeValInChars;
6221     llvm::Value *SizeVal;
6222     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6223     // We use delayed creation/initialization for VLAs, array sections and
6224     // custom reduction initializations. It is required because runtime does not
6225     // provide the way to pass the sizes of VLAs/array sections to
6226     // initializer/combiner/finalizer functions and does not pass the pointer to
6227     // original reduction item to the initializer. Instead threadprivate global
6228     // variables are used to store these values and use them in the functions.
6229     bool DelayedCreation = !!SizeVal;
6230     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6231                                                /*isSigned=*/false);
6232     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6233     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6234     // ElemLVal.reduce_init = init;
6235     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6236     llvm::Value *InitAddr =
6237         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6238     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6239     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6240     // ElemLVal.reduce_fini = fini;
6241     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6242     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6243     llvm::Value *FiniAddr = Fini
6244                                 ? CGF.EmitCastToVoidPtr(Fini)
6245                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6246     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6247     // ElemLVal.reduce_comb = comb;
6248     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6249     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6250         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6251         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6252     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6253     // ElemLVal.flags = 0;
6254     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6255     if (DelayedCreation) {
6256       CGF.EmitStoreOfScalar(
6257           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6258           FlagsLVal);
6259     } else
6260       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6261                                  FlagsLVal.getType());
6262   }
6263   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6264   // *data);
6265   llvm::Value *Args[] = {
6266       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6267                                 /*isSigned=*/true),
6268       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6269       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6270                                                       CGM.VoidPtrTy)};
6271   return CGF.EmitRuntimeCall(
6272       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6273 }
6274 
6275 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6276                                               SourceLocation Loc,
6277                                               ReductionCodeGen &RCG,
6278                                               unsigned N) {
6279   auto Sizes = RCG.getSizes(N);
6280   // Emit threadprivate global variable if the type is non-constant
6281   // (Sizes.second = nullptr).
6282   if (Sizes.second) {
6283     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6284                                                      /*isSigned=*/false);
6285     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6286         CGF, CGM.getContext().getSizeType(),
6287         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6288     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6289   }
6290   // Store address of the original reduction item if custom initializer is used.
6291   if (RCG.usesReductionInitializer(N)) {
6292     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6293         CGF, CGM.getContext().VoidPtrTy,
6294         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6295     CGF.Builder.CreateStore(
6296         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6297             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6298         SharedAddr, /*IsVolatile=*/false);
6299   }
6300 }
6301 
6302 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6303                                               SourceLocation Loc,
6304                                               llvm::Value *ReductionsPtr,
6305                                               LValue SharedLVal) {
6306   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6307   // *d);
6308   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6309                                                    CGM.IntTy,
6310                                                    /*isSigned=*/true),
6311                          ReductionsPtr,
6312                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6313                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6314   return Address(
6315       CGF.EmitRuntimeCall(
6316           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6317       SharedLVal.getAlignment());
6318 }
6319 
6320 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6321                                        SourceLocation Loc) {
6322   if (!CGF.HaveInsertPoint())
6323     return;
6324   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6325   // global_tid);
6326   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6327   // Ignore return result until untied tasks are supported.
6328   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6329   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6330     Region->emitUntiedSwitch(CGF);
6331 }
6332 
6333 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6334                                            OpenMPDirectiveKind InnerKind,
6335                                            const RegionCodeGenTy &CodeGen,
6336                                            bool HasCancel) {
6337   if (!CGF.HaveInsertPoint())
6338     return;
6339   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6340   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6341 }
6342 
6343 namespace {
6344 enum RTCancelKind {
6345   CancelNoreq = 0,
6346   CancelParallel = 1,
6347   CancelLoop = 2,
6348   CancelSections = 3,
6349   CancelTaskgroup = 4
6350 };
6351 } // anonymous namespace
6352 
6353 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6354   RTCancelKind CancelKind = CancelNoreq;
6355   if (CancelRegion == OMPD_parallel)
6356     CancelKind = CancelParallel;
6357   else if (CancelRegion == OMPD_for)
6358     CancelKind = CancelLoop;
6359   else if (CancelRegion == OMPD_sections)
6360     CancelKind = CancelSections;
6361   else {
6362     assert(CancelRegion == OMPD_taskgroup);
6363     CancelKind = CancelTaskgroup;
6364   }
6365   return CancelKind;
6366 }
6367 
6368 void CGOpenMPRuntime::emitCancellationPointCall(
6369     CodeGenFunction &CGF, SourceLocation Loc,
6370     OpenMPDirectiveKind CancelRegion) {
6371   if (!CGF.HaveInsertPoint())
6372     return;
6373   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6374   // global_tid, kmp_int32 cncl_kind);
6375   if (auto *OMPRegionInfo =
6376           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6377     // For 'cancellation point taskgroup', the task region info may not have a
6378     // cancel. This may instead happen in another adjacent task.
6379     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6380       llvm::Value *Args[] = {
6381           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6382           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6383       // Ignore return result until untied tasks are supported.
6384       llvm::Value *Result = CGF.EmitRuntimeCall(
6385           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6386       // if (__kmpc_cancellationpoint()) {
6387       //   exit from construct;
6388       // }
6389       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393       CGF.EmitBlock(ExitBB);
6394       // exit from construct;
6395       CodeGenFunction::JumpDest CancelDest =
6396           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6397       CGF.EmitBranchThroughCleanup(CancelDest);
6398       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6399     }
6400   }
6401 }
6402 
6403 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6404                                      const Expr *IfCond,
6405                                      OpenMPDirectiveKind CancelRegion) {
6406   if (!CGF.HaveInsertPoint())
6407     return;
6408   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6409   // kmp_int32 cncl_kind);
6410   if (auto *OMPRegionInfo =
6411           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6412     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6413                                                         PrePostActionTy &) {
6414       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6415       llvm::Value *Args[] = {
6416           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6417           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6418       // Ignore return result until untied tasks are supported.
6419       llvm::Value *Result = CGF.EmitRuntimeCall(
6420           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6421       // if (__kmpc_cancel()) {
6422       //   exit from construct;
6423       // }
6424       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6425       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6426       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6427       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6428       CGF.EmitBlock(ExitBB);
6429       // exit from construct;
6430       CodeGenFunction::JumpDest CancelDest =
6431           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6432       CGF.EmitBranchThroughCleanup(CancelDest);
6433       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6434     };
6435     if (IfCond) {
6436       emitIfClause(CGF, IfCond, ThenGen,
6437                    [](CodeGenFunction &, PrePostActionTy &) {});
6438     } else {
6439       RegionCodeGenTy ThenRCG(ThenGen);
6440       ThenRCG(CGF);
6441     }
6442   }
6443 }
6444 
6445 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6446     const OMPExecutableDirective &D, StringRef ParentName,
6447     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6448     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6449   assert(!ParentName.empty() && "Invalid target region parent name!");
6450   HasEmittedTargetRegion = true;
6451   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6452                                    IsOffloadEntry, CodeGen);
6453 }
6454 
6455 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6456     const OMPExecutableDirective &D, StringRef ParentName,
6457     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6458     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6459   // Create a unique name for the entry function using the source location
6460   // information of the current target region. The name will be something like:
6461   //
6462   // __omp_offloading_DD_FFFF_PP_lBB
6463   //
6464   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6465   // mangled name of the function that encloses the target region and BB is the
6466   // line number of the target region.
6467 
6468   unsigned DeviceID;
6469   unsigned FileID;
6470   unsigned Line;
6471   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6472                            Line);
6473   SmallString<64> EntryFnName;
6474   {
6475     llvm::raw_svector_ostream OS(EntryFnName);
6476     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6477        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6478   }
6479 
6480   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6481 
6482   CodeGenFunction CGF(CGM, true);
6483   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6484   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6485 
6486   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6487 
6488   // If this target outline function is not an offload entry, we don't need to
6489   // register it.
6490   if (!IsOffloadEntry)
6491     return;
6492 
6493   // The target region ID is used by the runtime library to identify the current
6494   // target region, so it only has to be unique and not necessarily point to
6495   // anything. It could be the pointer to the outlined function that implements
6496   // the target region, but we aren't using that so that the compiler doesn't
6497   // need to keep that, and could therefore inline the host function if proven
6498   // worthwhile during optimization. In the other hand, if emitting code for the
6499   // device, the ID has to be the function address so that it can retrieved from
6500   // the offloading entry and launched by the runtime library. We also mark the
6501   // outlined function to have external linkage in case we are emitting code for
6502   // the device, because these functions will be entry points to the device.
6503 
6504   if (CGM.getLangOpts().OpenMPIsDevice) {
6505     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6506     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6507     OutlinedFn->setDSOLocal(false);
6508   } else {
6509     std::string Name = getName({EntryFnName, "region_id"});
6510     OutlinedFnID = new llvm::GlobalVariable(
6511         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6512         llvm::GlobalValue::WeakAnyLinkage,
6513         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6514   }
6515 
6516   // Register the information for the entry associated with this target region.
6517   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6518       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6519       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6520 }
6521 
6522 /// Checks if the expression is constant or does not have non-trivial function
6523 /// calls.
6524 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6525   // We can skip constant expressions.
6526   // We can skip expressions with trivial calls or simple expressions.
6527   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6528           !E->hasNonTrivialCall(Ctx)) &&
6529          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6530 }
6531 
6532 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6533                                                     const Stmt *Body) {
6534   const Stmt *Child = Body->IgnoreContainers();
6535   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6536     Child = nullptr;
6537     for (const Stmt *S : C->body()) {
6538       if (const auto *E = dyn_cast<Expr>(S)) {
6539         if (isTrivial(Ctx, E))
6540           continue;
6541       }
6542       // Some of the statements can be ignored.
6543       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6544           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6545         continue;
6546       // Analyze declarations.
6547       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6548         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6549               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6550                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6551                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6552                   isa<UsingDirectiveDecl>(D) ||
6553                   isa<OMPDeclareReductionDecl>(D) ||
6554                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6555                 return true;
6556               const auto *VD = dyn_cast<VarDecl>(D);
6557               if (!VD)
6558                 return false;
6559               return VD->isConstexpr() ||
6560                      ((VD->getType().isTrivialType(Ctx) ||
6561                        VD->getType()->isReferenceType()) &&
6562                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6563             }))
6564           continue;
6565       }
6566       // Found multiple children - cannot get the one child only.
6567       if (Child)
6568         return nullptr;
6569       Child = S;
6570     }
6571     if (Child)
6572       Child = Child->IgnoreContainers();
6573   }
6574   return Child;
6575 }
6576 
6577 /// Emit the number of teams for a target directive.  Inspect the num_teams
6578 /// clause associated with a teams construct combined or closely nested
6579 /// with the target directive.
6580 ///
6581 /// Emit a team of size one for directives such as 'target parallel' that
6582 /// have no associated teams construct.
6583 ///
6584 /// Otherwise, return nullptr.
6585 static llvm::Value *
6586 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6587                                const OMPExecutableDirective &D) {
6588   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6589          "Clauses associated with the teams directive expected to be emitted "
6590          "only for the host!");
6591   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6593          "Expected target-based executable directive.");
6594   CGBuilderTy &Bld = CGF.Builder;
6595   switch (DirectiveKind) {
6596   case OMPD_target: {
6597     const auto *CS = D.getInnermostCapturedStmt();
6598     const auto *Body =
6599         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6600     const Stmt *ChildStmt =
6601         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6602     if (const auto *NestedDir =
6603             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6604       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6605         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6606           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6607           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6608           const Expr *NumTeams =
6609               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6610           llvm::Value *NumTeamsVal =
6611               CGF.EmitScalarExpr(NumTeams,
6612                                  /*IgnoreResultAssign*/ true);
6613           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6614                                    /*isSigned=*/true);
6615         }
6616         return Bld.getInt32(0);
6617       }
6618       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6619           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6620         return Bld.getInt32(1);
6621       return Bld.getInt32(0);
6622     }
6623     return nullptr;
6624   }
6625   case OMPD_target_teams:
6626   case OMPD_target_teams_distribute:
6627   case OMPD_target_teams_distribute_simd:
6628   case OMPD_target_teams_distribute_parallel_for:
6629   case OMPD_target_teams_distribute_parallel_for_simd: {
6630     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6631       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6632       const Expr *NumTeams =
6633           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6634       llvm::Value *NumTeamsVal =
6635           CGF.EmitScalarExpr(NumTeams,
6636                              /*IgnoreResultAssign*/ true);
6637       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6638                                /*isSigned=*/true);
6639     }
6640     return Bld.getInt32(0);
6641   }
6642   case OMPD_target_parallel:
6643   case OMPD_target_parallel_for:
6644   case OMPD_target_parallel_for_simd:
6645   case OMPD_target_simd:
6646     return Bld.getInt32(1);
6647   case OMPD_parallel:
6648   case OMPD_for:
6649   case OMPD_parallel_for:
6650   case OMPD_parallel_master:
6651   case OMPD_parallel_sections:
6652   case OMPD_for_simd:
6653   case OMPD_parallel_for_simd:
6654   case OMPD_cancel:
6655   case OMPD_cancellation_point:
6656   case OMPD_ordered:
6657   case OMPD_threadprivate:
6658   case OMPD_allocate:
6659   case OMPD_task:
6660   case OMPD_simd:
6661   case OMPD_sections:
6662   case OMPD_section:
6663   case OMPD_single:
6664   case OMPD_master:
6665   case OMPD_critical:
6666   case OMPD_taskyield:
6667   case OMPD_barrier:
6668   case OMPD_taskwait:
6669   case OMPD_taskgroup:
6670   case OMPD_atomic:
6671   case OMPD_flush:
6672   case OMPD_teams:
6673   case OMPD_target_data:
6674   case OMPD_target_exit_data:
6675   case OMPD_target_enter_data:
6676   case OMPD_distribute:
6677   case OMPD_distribute_simd:
6678   case OMPD_distribute_parallel_for:
6679   case OMPD_distribute_parallel_for_simd:
6680   case OMPD_teams_distribute:
6681   case OMPD_teams_distribute_simd:
6682   case OMPD_teams_distribute_parallel_for:
6683   case OMPD_teams_distribute_parallel_for_simd:
6684   case OMPD_target_update:
6685   case OMPD_declare_simd:
6686   case OMPD_declare_variant:
6687   case OMPD_declare_target:
6688   case OMPD_end_declare_target:
6689   case OMPD_declare_reduction:
6690   case OMPD_declare_mapper:
6691   case OMPD_taskloop:
6692   case OMPD_taskloop_simd:
6693   case OMPD_master_taskloop:
6694   case OMPD_master_taskloop_simd:
6695   case OMPD_parallel_master_taskloop:
6696   case OMPD_parallel_master_taskloop_simd:
6697   case OMPD_requires:
6698   case OMPD_unknown:
6699     break;
6700   }
6701   llvm_unreachable("Unexpected directive kind.");
6702 }
6703 
6704 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6705                                   llvm::Value *DefaultThreadLimitVal) {
6706   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6707       CGF.getContext(), CS->getCapturedStmt());
6708   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6709     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6710       llvm::Value *NumThreads = nullptr;
6711       llvm::Value *CondVal = nullptr;
6712       // Handle if clause. If if clause present, the number of threads is
6713       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6714       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6715         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6716         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6717         const OMPIfClause *IfClause = nullptr;
6718         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6719           if (C->getNameModifier() == OMPD_unknown ||
6720               C->getNameModifier() == OMPD_parallel) {
6721             IfClause = C;
6722             break;
6723           }
6724         }
6725         if (IfClause) {
6726           const Expr *Cond = IfClause->getCondition();
6727           bool Result;
6728           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6729             if (!Result)
6730               return CGF.Builder.getInt32(1);
6731           } else {
6732             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6733             if (const auto *PreInit =
6734                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6735               for (const auto *I : PreInit->decls()) {
6736                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6737                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6738                 } else {
6739                   CodeGenFunction::AutoVarEmission Emission =
6740                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6741                   CGF.EmitAutoVarCleanups(Emission);
6742                 }
6743               }
6744             }
6745             CondVal = CGF.EvaluateExprAsBool(Cond);
6746           }
6747         }
6748       }
6749       // Check the value of num_threads clause iff if clause was not specified
6750       // or is not evaluated to false.
6751       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6752         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6753         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6754         const auto *NumThreadsClause =
6755             Dir->getSingleClause<OMPNumThreadsClause>();
6756         CodeGenFunction::LexicalScope Scope(
6757             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6758         if (const auto *PreInit =
6759                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6760           for (const auto *I : PreInit->decls()) {
6761             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6762               CGF.EmitVarDecl(cast<VarDecl>(*I));
6763             } else {
6764               CodeGenFunction::AutoVarEmission Emission =
6765                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6766               CGF.EmitAutoVarCleanups(Emission);
6767             }
6768           }
6769         }
6770         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6771         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6772                                                /*isSigned=*/false);
6773         if (DefaultThreadLimitVal)
6774           NumThreads = CGF.Builder.CreateSelect(
6775               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6776               DefaultThreadLimitVal, NumThreads);
6777       } else {
6778         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6779                                            : CGF.Builder.getInt32(0);
6780       }
6781       // Process condition of the if clause.
6782       if (CondVal) {
6783         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6784                                               CGF.Builder.getInt32(1));
6785       }
6786       return NumThreads;
6787     }
6788     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6789       return CGF.Builder.getInt32(1);
6790     return DefaultThreadLimitVal;
6791   }
6792   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6793                                : CGF.Builder.getInt32(0);
6794 }
6795 
6796 /// Emit the number of threads for a target directive.  Inspect the
6797 /// thread_limit clause associated with a teams construct combined or closely
6798 /// nested with the target directive.
6799 ///
6800 /// Emit the num_threads clause for directives such as 'target parallel' that
6801 /// have no associated teams construct.
6802 ///
6803 /// Otherwise, return nullptr.
6804 static llvm::Value *
6805 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6806                                  const OMPExecutableDirective &D) {
6807   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6808          "Clauses associated with the teams directive expected to be emitted "
6809          "only for the host!");
6810   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6811   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6812          "Expected target-based executable directive.");
6813   CGBuilderTy &Bld = CGF.Builder;
6814   llvm::Value *ThreadLimitVal = nullptr;
6815   llvm::Value *NumThreadsVal = nullptr;
6816   switch (DirectiveKind) {
6817   case OMPD_target: {
6818     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6819     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6820       return NumThreads;
6821     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6822         CGF.getContext(), CS->getCapturedStmt());
6823     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6824       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6825         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827         const auto *ThreadLimitClause =
6828             Dir->getSingleClause<OMPThreadLimitClause>();
6829         CodeGenFunction::LexicalScope Scope(
6830             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6831         if (const auto *PreInit =
6832                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6833           for (const auto *I : PreInit->decls()) {
6834             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835               CGF.EmitVarDecl(cast<VarDecl>(*I));
6836             } else {
6837               CodeGenFunction::AutoVarEmission Emission =
6838                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839               CGF.EmitAutoVarCleanups(Emission);
6840             }
6841           }
6842         }
6843         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6844             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6845         ThreadLimitVal =
6846             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6847       }
6848       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6849           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6850         CS = Dir->getInnermostCapturedStmt();
6851         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6852             CGF.getContext(), CS->getCapturedStmt());
6853         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6854       }
6855       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6856           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6857         CS = Dir->getInnermostCapturedStmt();
6858         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6859           return NumThreads;
6860       }
6861       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6862         return Bld.getInt32(1);
6863     }
6864     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6865   }
6866   case OMPD_target_teams: {
6867     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6868       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6869       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6870       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872       ThreadLimitVal =
6873           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874     }
6875     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6876     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6877       return NumThreads;
6878     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879         CGF.getContext(), CS->getCapturedStmt());
6880     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6881       if (Dir->getDirectiveKind() == OMPD_distribute) {
6882         CS = Dir->getInnermostCapturedStmt();
6883         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6884           return NumThreads;
6885       }
6886     }
6887     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6888   }
6889   case OMPD_target_teams_distribute:
6890     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6891       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6892       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6893       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6894           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6895       ThreadLimitVal =
6896           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6897     }
6898     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6899   case OMPD_target_parallel:
6900   case OMPD_target_parallel_for:
6901   case OMPD_target_parallel_for_simd:
6902   case OMPD_target_teams_distribute_parallel_for:
6903   case OMPD_target_teams_distribute_parallel_for_simd: {
6904     llvm::Value *CondVal = nullptr;
6905     // Handle if clause. If if clause present, the number of threads is
6906     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6907     if (D.hasClausesOfKind<OMPIfClause>()) {
6908       const OMPIfClause *IfClause = nullptr;
6909       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6910         if (C->getNameModifier() == OMPD_unknown ||
6911             C->getNameModifier() == OMPD_parallel) {
6912           IfClause = C;
6913           break;
6914         }
6915       }
6916       if (IfClause) {
6917         const Expr *Cond = IfClause->getCondition();
6918         bool Result;
6919         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6920           if (!Result)
6921             return Bld.getInt32(1);
6922         } else {
6923           CodeGenFunction::RunCleanupsScope Scope(CGF);
6924           CondVal = CGF.EvaluateExprAsBool(Cond);
6925         }
6926       }
6927     }
6928     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6929       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6930       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6931       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6932           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6933       ThreadLimitVal =
6934           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6935     }
6936     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6937       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6938       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6939       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6940           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6941       NumThreadsVal =
6942           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6943       ThreadLimitVal = ThreadLimitVal
6944                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6945                                                                 ThreadLimitVal),
6946                                               NumThreadsVal, ThreadLimitVal)
6947                            : NumThreadsVal;
6948     }
6949     if (!ThreadLimitVal)
6950       ThreadLimitVal = Bld.getInt32(0);
6951     if (CondVal)
6952       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6953     return ThreadLimitVal;
6954   }
6955   case OMPD_target_teams_distribute_simd:
6956   case OMPD_target_simd:
6957     return Bld.getInt32(1);
6958   case OMPD_parallel:
6959   case OMPD_for:
6960   case OMPD_parallel_for:
6961   case OMPD_parallel_master:
6962   case OMPD_parallel_sections:
6963   case OMPD_for_simd:
6964   case OMPD_parallel_for_simd:
6965   case OMPD_cancel:
6966   case OMPD_cancellation_point:
6967   case OMPD_ordered:
6968   case OMPD_threadprivate:
6969   case OMPD_allocate:
6970   case OMPD_task:
6971   case OMPD_simd:
6972   case OMPD_sections:
6973   case OMPD_section:
6974   case OMPD_single:
6975   case OMPD_master:
6976   case OMPD_critical:
6977   case OMPD_taskyield:
6978   case OMPD_barrier:
6979   case OMPD_taskwait:
6980   case OMPD_taskgroup:
6981   case OMPD_atomic:
6982   case OMPD_flush:
6983   case OMPD_teams:
6984   case OMPD_target_data:
6985   case OMPD_target_exit_data:
6986   case OMPD_target_enter_data:
6987   case OMPD_distribute:
6988   case OMPD_distribute_simd:
6989   case OMPD_distribute_parallel_for:
6990   case OMPD_distribute_parallel_for_simd:
6991   case OMPD_teams_distribute:
6992   case OMPD_teams_distribute_simd:
6993   case OMPD_teams_distribute_parallel_for:
6994   case OMPD_teams_distribute_parallel_for_simd:
6995   case OMPD_target_update:
6996   case OMPD_declare_simd:
6997   case OMPD_declare_variant:
6998   case OMPD_declare_target:
6999   case OMPD_end_declare_target:
7000   case OMPD_declare_reduction:
7001   case OMPD_declare_mapper:
7002   case OMPD_taskloop:
7003   case OMPD_taskloop_simd:
7004   case OMPD_master_taskloop:
7005   case OMPD_master_taskloop_simd:
7006   case OMPD_parallel_master_taskloop:
7007   case OMPD_parallel_master_taskloop_simd:
7008   case OMPD_requires:
7009   case OMPD_unknown:
7010     break;
7011   }
7012   llvm_unreachable("Unsupported directive kind.");
7013 }
7014 
7015 namespace {
7016 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7017 
7018 // Utility to handle information from clauses associated with a given
7019 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7020 // It provides a convenient interface to obtain the information and generate
7021 // code for that information.
7022 class MappableExprsHandler {
7023 public:
7024   /// Values for bit flags used to specify the mapping type for
7025   /// offloading.
7026   enum OpenMPOffloadMappingFlags : uint64_t {
7027     /// No flags
7028     OMP_MAP_NONE = 0x0,
7029     /// Allocate memory on the device and move data from host to device.
7030     OMP_MAP_TO = 0x01,
7031     /// Allocate memory on the device and move data from device to host.
7032     OMP_MAP_FROM = 0x02,
7033     /// Always perform the requested mapping action on the element, even
7034     /// if it was already mapped before.
7035     OMP_MAP_ALWAYS = 0x04,
7036     /// Delete the element from the device environment, ignoring the
7037     /// current reference count associated with the element.
7038     OMP_MAP_DELETE = 0x08,
7039     /// The element being mapped is a pointer-pointee pair; both the
7040     /// pointer and the pointee should be mapped.
7041     OMP_MAP_PTR_AND_OBJ = 0x10,
7042     /// This flags signals that the base address of an entry should be
7043     /// passed to the target kernel as an argument.
7044     OMP_MAP_TARGET_PARAM = 0x20,
7045     /// Signal that the runtime library has to return the device pointer
7046     /// in the current position for the data being mapped. Used when we have the
7047     /// use_device_ptr clause.
7048     OMP_MAP_RETURN_PARAM = 0x40,
7049     /// This flag signals that the reference being passed is a pointer to
7050     /// private data.
7051     OMP_MAP_PRIVATE = 0x80,
7052     /// Pass the element to the device by value.
7053     OMP_MAP_LITERAL = 0x100,
7054     /// Implicit map
7055     OMP_MAP_IMPLICIT = 0x200,
7056     /// Close is a hint to the runtime to allocate memory close to
7057     /// the target device.
7058     OMP_MAP_CLOSE = 0x400,
7059     /// The 16 MSBs of the flags indicate whether the entry is member of some
7060     /// struct/class.
7061     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7062     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7063   };
7064 
7065   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7066   static unsigned getFlagMemberOffset() {
7067     unsigned Offset = 0;
7068     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7069          Remain = Remain >> 1)
7070       Offset++;
7071     return Offset;
7072   }
7073 
7074   /// Class that associates information with a base pointer to be passed to the
7075   /// runtime library.
7076   class BasePointerInfo {
7077     /// The base pointer.
7078     llvm::Value *Ptr = nullptr;
7079     /// The base declaration that refers to this device pointer, or null if
7080     /// there is none.
7081     const ValueDecl *DevPtrDecl = nullptr;
7082 
7083   public:
7084     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7085         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7086     llvm::Value *operator*() const { return Ptr; }
7087     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7088     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7089   };
7090 
7091   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7092   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7093   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7094 
7095   /// Map between a struct and the its lowest & highest elements which have been
7096   /// mapped.
7097   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7098   ///                    HE(FieldIndex, Pointer)}
7099   struct StructRangeInfoTy {
7100     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7101         0, Address::invalid()};
7102     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7103         0, Address::invalid()};
7104     Address Base = Address::invalid();
7105   };
7106 
7107 private:
7108   /// Kind that defines how a device pointer has to be returned.
7109   struct MapInfo {
7110     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7111     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7112     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7113     bool ReturnDevicePointer = false;
7114     bool IsImplicit = false;
7115 
7116     MapInfo() = default;
7117     MapInfo(
7118         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7119         OpenMPMapClauseKind MapType,
7120         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7121         bool ReturnDevicePointer, bool IsImplicit)
7122         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7123           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7124   };
7125 
7126   /// If use_device_ptr is used on a pointer which is a struct member and there
7127   /// is no map information about it, then emission of that entry is deferred
7128   /// until the whole struct has been processed.
7129   struct DeferredDevicePtrEntryTy {
7130     const Expr *IE = nullptr;
7131     const ValueDecl *VD = nullptr;
7132 
7133     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7134         : IE(IE), VD(VD) {}
7135   };
7136 
7137   /// The target directive from where the mappable clauses were extracted. It
7138   /// is either a executable directive or a user-defined mapper directive.
7139   llvm::PointerUnion<const OMPExecutableDirective *,
7140                      const OMPDeclareMapperDecl *>
7141       CurDir;
7142 
7143   /// Function the directive is being generated for.
7144   CodeGenFunction &CGF;
7145 
7146   /// Set of all first private variables in the current directive.
7147   /// bool data is set to true if the variable is implicitly marked as
7148   /// firstprivate, false otherwise.
7149   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7150 
7151   /// Map between device pointer declarations and their expression components.
7152   /// The key value for declarations in 'this' is null.
7153   llvm::DenseMap<
7154       const ValueDecl *,
7155       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7156       DevPointersMap;
7157 
7158   llvm::Value *getExprTypeSize(const Expr *E) const {
7159     QualType ExprTy = E->getType().getCanonicalType();
7160 
7161     // Reference types are ignored for mapping purposes.
7162     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7163       ExprTy = RefTy->getPointeeType().getCanonicalType();
7164 
7165     // Given that an array section is considered a built-in type, we need to
7166     // do the calculation based on the length of the section instead of relying
7167     // on CGF.getTypeSize(E->getType()).
7168     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7169       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7170                             OAE->getBase()->IgnoreParenImpCasts())
7171                             .getCanonicalType();
7172 
7173       // If there is no length associated with the expression and lower bound is
7174       // not specified too, that means we are using the whole length of the
7175       // base.
7176       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7177           !OAE->getLowerBound())
7178         return CGF.getTypeSize(BaseTy);
7179 
7180       llvm::Value *ElemSize;
7181       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7182         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7183       } else {
7184         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7185         assert(ATy && "Expecting array type if not a pointer type.");
7186         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7187       }
7188 
7189       // If we don't have a length at this point, that is because we have an
7190       // array section with a single element.
7191       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7192         return ElemSize;
7193 
7194       if (const Expr *LenExpr = OAE->getLength()) {
7195         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7196         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7197                                              CGF.getContext().getSizeType(),
7198                                              LenExpr->getExprLoc());
7199         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7200       }
7201       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7202              OAE->getLowerBound() && "expected array_section[lb:].");
7203       // Size = sizetype - lb * elemtype;
7204       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7205       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7206       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7207                                        CGF.getContext().getSizeType(),
7208                                        OAE->getLowerBound()->getExprLoc());
7209       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7210       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7211       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7212       LengthVal = CGF.Builder.CreateSelect(
7213           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7214       return LengthVal;
7215     }
7216     return CGF.getTypeSize(ExprTy);
7217   }
7218 
7219   /// Return the corresponding bits for a given map clause modifier. Add
7220   /// a flag marking the map as a pointer if requested. Add a flag marking the
7221   /// map as the first one of a series of maps that relate to the same map
7222   /// expression.
7223   OpenMPOffloadMappingFlags getMapTypeBits(
7224       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7226     OpenMPOffloadMappingFlags Bits =
7227         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7228     switch (MapType) {
7229     case OMPC_MAP_alloc:
7230     case OMPC_MAP_release:
7231       // alloc and release is the default behavior in the runtime library,  i.e.
7232       // if we don't pass any bits alloc/release that is what the runtime is
7233       // going to do. Therefore, we don't need to signal anything for these two
7234       // type modifiers.
7235       break;
7236     case OMPC_MAP_to:
7237       Bits |= OMP_MAP_TO;
7238       break;
7239     case OMPC_MAP_from:
7240       Bits |= OMP_MAP_FROM;
7241       break;
7242     case OMPC_MAP_tofrom:
7243       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7244       break;
7245     case OMPC_MAP_delete:
7246       Bits |= OMP_MAP_DELETE;
7247       break;
7248     case OMPC_MAP_unknown:
7249       llvm_unreachable("Unexpected map type!");
7250     }
7251     if (AddPtrFlag)
7252       Bits |= OMP_MAP_PTR_AND_OBJ;
7253     if (AddIsTargetParamFlag)
7254       Bits |= OMP_MAP_TARGET_PARAM;
7255     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7256         != MapModifiers.end())
7257       Bits |= OMP_MAP_ALWAYS;
7258     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7259         != MapModifiers.end())
7260       Bits |= OMP_MAP_CLOSE;
7261     return Bits;
7262   }
7263 
7264   /// Return true if the provided expression is a final array section. A
7265   /// final array section, is one whose length can't be proved to be one.
7266   bool isFinalArraySectionExpression(const Expr *E) const {
7267     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7268 
7269     // It is not an array section and therefore not a unity-size one.
7270     if (!OASE)
7271       return false;
7272 
7273     // An array section with no colon always refer to a single element.
7274     if (OASE->getColonLoc().isInvalid())
7275       return false;
7276 
7277     const Expr *Length = OASE->getLength();
7278 
7279     // If we don't have a length we have to check if the array has size 1
7280     // for this dimension. Also, we should always expect a length if the
7281     // base type is pointer.
7282     if (!Length) {
7283       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7284                              OASE->getBase()->IgnoreParenImpCasts())
7285                              .getCanonicalType();
7286       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7287         return ATy->getSize().getSExtValue() != 1;
7288       // If we don't have a constant dimension length, we have to consider
7289       // the current section as having any size, so it is not necessarily
7290       // unitary. If it happen to be unity size, that's user fault.
7291       return true;
7292     }
7293 
7294     // Check if the length evaluates to 1.
7295     Expr::EvalResult Result;
7296     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7297       return true; // Can have more that size 1.
7298 
7299     llvm::APSInt ConstLength = Result.Val.getInt();
7300     return ConstLength.getSExtValue() != 1;
7301   }
7302 
7303   /// Generate the base pointers, section pointers, sizes and map type
7304   /// bits for the provided map type, map modifier, and expression components.
7305   /// \a IsFirstComponent should be set to true if the provided set of
7306   /// components is the first associated with a capture.
7307   void generateInfoForComponentList(
7308       OpenMPMapClauseKind MapType,
7309       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7310       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7311       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7312       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7313       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7314       bool IsImplicit,
7315       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7316           OverlappedElements = llvm::None) const {
7317     // The following summarizes what has to be generated for each map and the
7318     // types below. The generated information is expressed in this order:
7319     // base pointer, section pointer, size, flags
7320     // (to add to the ones that come from the map type and modifier).
7321     //
7322     // double d;
7323     // int i[100];
7324     // float *p;
7325     //
7326     // struct S1 {
7327     //   int i;
7328     //   float f[50];
7329     // }
7330     // struct S2 {
7331     //   int i;
7332     //   float f[50];
7333     //   S1 s;
7334     //   double *p;
7335     //   struct S2 *ps;
7336     // }
7337     // S2 s;
7338     // S2 *ps;
7339     //
7340     // map(d)
7341     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7342     //
7343     // map(i)
7344     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7345     //
7346     // map(i[1:23])
7347     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7348     //
7349     // map(p)
7350     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7351     //
7352     // map(p[1:24])
7353     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7354     //
7355     // map(s)
7356     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7357     //
7358     // map(s.i)
7359     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7360     //
7361     // map(s.s.f)
7362     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7363     //
7364     // map(s.p)
7365     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7366     //
7367     // map(to: s.p[:22])
7368     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7369     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7370     // &(s.p), &(s.p[0]), 22*sizeof(double),
7371     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7372     // (*) alloc space for struct members, only this is a target parameter
7373     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7374     //      optimizes this entry out, same in the examples below)
7375     // (***) map the pointee (map: to)
7376     //
7377     // map(s.ps)
7378     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7379     //
7380     // map(from: s.ps->s.i)
7381     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7382     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7383     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7384     //
7385     // map(to: s.ps->ps)
7386     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7389     //
7390     // map(s.ps->ps->ps)
7391     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7392     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7393     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7394     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7395     //
7396     // map(to: s.ps->ps->s.f[:22])
7397     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7398     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7399     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7400     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7401     //
7402     // map(ps)
7403     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7404     //
7405     // map(ps->i)
7406     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7407     //
7408     // map(ps->s.f)
7409     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7410     //
7411     // map(from: ps->p)
7412     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7413     //
7414     // map(to: ps->p[:22])
7415     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7416     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7417     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7418     //
7419     // map(ps->ps)
7420     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7421     //
7422     // map(from: ps->ps->s.i)
7423     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7424     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7425     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7426     //
7427     // map(from: ps->ps->ps)
7428     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7431     //
7432     // map(ps->ps->ps->ps)
7433     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7434     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7435     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7436     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7437     //
7438     // map(to: ps->ps->ps->s.f[:22])
7439     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7440     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7441     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7442     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7443     //
7444     // map(to: s.f[:22]) map(from: s.p[:33])
7445     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7446     //     sizeof(double*) (**), TARGET_PARAM
7447     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7448     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7449     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7450     // (*) allocate contiguous space needed to fit all mapped members even if
7451     //     we allocate space for members not mapped (in this example,
7452     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7453     //     them as well because they fall between &s.f[0] and &s.p)
7454     //
7455     // map(from: s.f[:22]) map(to: ps->p[:33])
7456     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7457     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7458     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7459     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7460     // (*) the struct this entry pertains to is the 2nd element in the list of
7461     //     arguments, hence MEMBER_OF(2)
7462     //
7463     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7464     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7465     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7466     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7467     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7468     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7469     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7470     // (*) the struct this entry pertains to is the 4th element in the list
7471     //     of arguments, hence MEMBER_OF(4)
7472 
7473     // Track if the map information being generated is the first for a capture.
7474     bool IsCaptureFirstInfo = IsFirstComponentList;
7475     // When the variable is on a declare target link or in a to clause with
7476     // unified memory, a reference is needed to hold the host/device address
7477     // of the variable.
7478     bool RequiresReference = false;
7479 
7480     // Scan the components from the base to the complete expression.
7481     auto CI = Components.rbegin();
7482     auto CE = Components.rend();
7483     auto I = CI;
7484 
7485     // Track if the map information being generated is the first for a list of
7486     // components.
7487     bool IsExpressionFirstInfo = true;
7488     Address BP = Address::invalid();
7489     const Expr *AssocExpr = I->getAssociatedExpression();
7490     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7491     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7492 
7493     if (isa<MemberExpr>(AssocExpr)) {
7494       // The base is the 'this' pointer. The content of the pointer is going
7495       // to be the base of the field being mapped.
7496       BP = CGF.LoadCXXThisAddress();
7497     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7498                (OASE &&
7499                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7500       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7501     } else {
7502       // The base is the reference to the variable.
7503       // BP = &Var.
7504       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7505       if (const auto *VD =
7506               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7507         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7508                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7509           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7510               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7511                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7512             RequiresReference = true;
7513             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7514           }
7515         }
7516       }
7517 
7518       // If the variable is a pointer and is being dereferenced (i.e. is not
7519       // the last component), the base has to be the pointer itself, not its
7520       // reference. References are ignored for mapping purposes.
7521       QualType Ty =
7522           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7523       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7524         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7525 
7526         // We do not need to generate individual map information for the
7527         // pointer, it can be associated with the combined storage.
7528         ++I;
7529       }
7530     }
7531 
7532     // Track whether a component of the list should be marked as MEMBER_OF some
7533     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7534     // in a component list should be marked as MEMBER_OF, all subsequent entries
7535     // do not belong to the base struct. E.g.
7536     // struct S2 s;
7537     // s.ps->ps->ps->f[:]
7538     //   (1) (2) (3) (4)
7539     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7540     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7541     // is the pointee of ps(2) which is not member of struct s, so it should not
7542     // be marked as such (it is still PTR_AND_OBJ).
7543     // The variable is initialized to false so that PTR_AND_OBJ entries which
7544     // are not struct members are not considered (e.g. array of pointers to
7545     // data).
7546     bool ShouldBeMemberOf = false;
7547 
7548     // Variable keeping track of whether or not we have encountered a component
7549     // in the component list which is a member expression. Useful when we have a
7550     // pointer or a final array section, in which case it is the previous
7551     // component in the list which tells us whether we have a member expression.
7552     // E.g. X.f[:]
7553     // While processing the final array section "[:]" it is "f" which tells us
7554     // whether we are dealing with a member of a declared struct.
7555     const MemberExpr *EncounteredME = nullptr;
7556 
7557     for (; I != CE; ++I) {
7558       // If the current component is member of a struct (parent struct) mark it.
7559       if (!EncounteredME) {
7560         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7561         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7562         // as MEMBER_OF the parent struct.
7563         if (EncounteredME)
7564           ShouldBeMemberOf = true;
7565       }
7566 
7567       auto Next = std::next(I);
7568 
7569       // We need to generate the addresses and sizes if this is the last
7570       // component, if the component is a pointer or if it is an array section
7571       // whose length can't be proved to be one. If this is a pointer, it
7572       // becomes the base address for the following components.
7573 
7574       // A final array section, is one whose length can't be proved to be one.
7575       bool IsFinalArraySection =
7576           isFinalArraySectionExpression(I->getAssociatedExpression());
7577 
7578       // Get information on whether the element is a pointer. Have to do a
7579       // special treatment for array sections given that they are built-in
7580       // types.
7581       const auto *OASE =
7582           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7583       bool IsPointer =
7584           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7585                        .getCanonicalType()
7586                        ->isAnyPointerType()) ||
7587           I->getAssociatedExpression()->getType()->isAnyPointerType();
7588 
7589       if (Next == CE || IsPointer || IsFinalArraySection) {
7590         // If this is not the last component, we expect the pointer to be
7591         // associated with an array expression or member expression.
7592         assert((Next == CE ||
7593                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7594                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7595                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7596                "Unexpected expression");
7597 
7598         Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7599                          .getAddress(CGF);
7600 
7601         // If this component is a pointer inside the base struct then we don't
7602         // need to create any entry for it - it will be combined with the object
7603         // it is pointing to into a single PTR_AND_OBJ entry.
7604         bool IsMemberPointer =
7605             IsPointer && EncounteredME &&
7606             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7607              EncounteredME);
7608         if (!OverlappedElements.empty()) {
7609           // Handle base element with the info for overlapped elements.
7610           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7611           assert(Next == CE &&
7612                  "Expected last element for the overlapped elements.");
7613           assert(!IsPointer &&
7614                  "Unexpected base element with the pointer type.");
7615           // Mark the whole struct as the struct that requires allocation on the
7616           // device.
7617           PartialStruct.LowestElem = {0, LB};
7618           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7619               I->getAssociatedExpression()->getType());
7620           Address HB = CGF.Builder.CreateConstGEP(
7621               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7622                                                               CGF.VoidPtrTy),
7623               TypeSize.getQuantity() - 1);
7624           PartialStruct.HighestElem = {
7625               std::numeric_limits<decltype(
7626                   PartialStruct.HighestElem.first)>::max(),
7627               HB};
7628           PartialStruct.Base = BP;
7629           // Emit data for non-overlapped data.
7630           OpenMPOffloadMappingFlags Flags =
7631               OMP_MAP_MEMBER_OF |
7632               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7633                              /*AddPtrFlag=*/false,
7634                              /*AddIsTargetParamFlag=*/false);
7635           LB = BP;
7636           llvm::Value *Size = nullptr;
7637           // Do bitcopy of all non-overlapped structure elements.
7638           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7639                    Component : OverlappedElements) {
7640             Address ComponentLB = Address::invalid();
7641             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7642                  Component) {
7643               if (MC.getAssociatedDeclaration()) {
7644                 ComponentLB =
7645                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7646                         .getAddress(CGF);
7647                 Size = CGF.Builder.CreatePtrDiff(
7648                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7649                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7650                 break;
7651               }
7652             }
7653             BasePointers.push_back(BP.getPointer());
7654             Pointers.push_back(LB.getPointer());
7655             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7656                                                       /*isSigned=*/true));
7657             Types.push_back(Flags);
7658             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7659           }
7660           BasePointers.push_back(BP.getPointer());
7661           Pointers.push_back(LB.getPointer());
7662           Size = CGF.Builder.CreatePtrDiff(
7663               CGF.EmitCastToVoidPtr(
7664                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7665               CGF.EmitCastToVoidPtr(LB.getPointer()));
7666           Sizes.push_back(
7667               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7668           Types.push_back(Flags);
7669           break;
7670         }
7671         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7672         if (!IsMemberPointer) {
7673           BasePointers.push_back(BP.getPointer());
7674           Pointers.push_back(LB.getPointer());
7675           Sizes.push_back(
7676               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7677 
7678           // We need to add a pointer flag for each map that comes from the
7679           // same expression except for the first one. We also need to signal
7680           // this map is the first one that relates with the current capture
7681           // (there is a set of entries for each capture).
7682           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7683               MapType, MapModifiers, IsImplicit,
7684               !IsExpressionFirstInfo || RequiresReference,
7685               IsCaptureFirstInfo && !RequiresReference);
7686 
7687           if (!IsExpressionFirstInfo) {
7688             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7689             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7690             if (IsPointer)
7691               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7692                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7693 
7694             if (ShouldBeMemberOf) {
7695               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696               // should be later updated with the correct value of MEMBER_OF.
7697               Flags |= OMP_MAP_MEMBER_OF;
7698               // From now on, all subsequent PTR_AND_OBJ entries should not be
7699               // marked as MEMBER_OF.
7700               ShouldBeMemberOf = false;
7701             }
7702           }
7703 
7704           Types.push_back(Flags);
7705         }
7706 
7707         // If we have encountered a member expression so far, keep track of the
7708         // mapped member. If the parent is "*this", then the value declaration
7709         // is nullptr.
7710         if (EncounteredME) {
7711           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7712           unsigned FieldIndex = FD->getFieldIndex();
7713 
7714           // Update info about the lowest and highest elements for this struct
7715           if (!PartialStruct.Base.isValid()) {
7716             PartialStruct.LowestElem = {FieldIndex, LB};
7717             PartialStruct.HighestElem = {FieldIndex, LB};
7718             PartialStruct.Base = BP;
7719           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7720             PartialStruct.LowestElem = {FieldIndex, LB};
7721           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7722             PartialStruct.HighestElem = {FieldIndex, LB};
7723           }
7724         }
7725 
7726         // If we have a final array section, we are done with this expression.
7727         if (IsFinalArraySection)
7728           break;
7729 
7730         // The pointer becomes the base for the next element.
7731         if (Next != CE)
7732           BP = LB;
7733 
7734         IsExpressionFirstInfo = false;
7735         IsCaptureFirstInfo = false;
7736       }
7737     }
7738   }
7739 
7740   /// Return the adjusted map modifiers if the declaration a capture refers to
7741   /// appears in a first-private clause. This is expected to be used only with
7742   /// directives that start with 'target'.
7743   MappableExprsHandler::OpenMPOffloadMappingFlags
7744   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7745     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7746 
7747     // A first private variable captured by reference will use only the
7748     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7749     // declaration is known as first-private in this handler.
7750     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7751       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7752           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7753         return MappableExprsHandler::OMP_MAP_ALWAYS |
7754                MappableExprsHandler::OMP_MAP_TO;
7755       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7756         return MappableExprsHandler::OMP_MAP_TO |
7757                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7758       return MappableExprsHandler::OMP_MAP_PRIVATE |
7759              MappableExprsHandler::OMP_MAP_TO;
7760     }
7761     return MappableExprsHandler::OMP_MAP_TO |
7762            MappableExprsHandler::OMP_MAP_FROM;
7763   }
7764 
7765   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7766     // Rotate by getFlagMemberOffset() bits.
7767     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7768                                                   << getFlagMemberOffset());
7769   }
7770 
7771   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7772                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7773     // If the entry is PTR_AND_OBJ but has not been marked with the special
7774     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7775     // marked as MEMBER_OF.
7776     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7777         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7778       return;
7779 
7780     // Reset the placeholder value to prepare the flag for the assignment of the
7781     // proper MEMBER_OF value.
7782     Flags &= ~OMP_MAP_MEMBER_OF;
7783     Flags |= MemberOfFlag;
7784   }
7785 
7786   void getPlainLayout(const CXXRecordDecl *RD,
7787                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7788                       bool AsBase) const {
7789     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7790 
7791     llvm::StructType *St =
7792         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7793 
7794     unsigned NumElements = St->getNumElements();
7795     llvm::SmallVector<
7796         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7797         RecordLayout(NumElements);
7798 
7799     // Fill bases.
7800     for (const auto &I : RD->bases()) {
7801       if (I.isVirtual())
7802         continue;
7803       const auto *Base = I.getType()->getAsCXXRecordDecl();
7804       // Ignore empty bases.
7805       if (Base->isEmpty() || CGF.getContext()
7806                                  .getASTRecordLayout(Base)
7807                                  .getNonVirtualSize()
7808                                  .isZero())
7809         continue;
7810 
7811       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7812       RecordLayout[FieldIndex] = Base;
7813     }
7814     // Fill in virtual bases.
7815     for (const auto &I : RD->vbases()) {
7816       const auto *Base = I.getType()->getAsCXXRecordDecl();
7817       // Ignore empty bases.
7818       if (Base->isEmpty())
7819         continue;
7820       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7821       if (RecordLayout[FieldIndex])
7822         continue;
7823       RecordLayout[FieldIndex] = Base;
7824     }
7825     // Fill in all the fields.
7826     assert(!RD->isUnion() && "Unexpected union.");
7827     for (const auto *Field : RD->fields()) {
7828       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7829       // will fill in later.)
7830       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7831         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7832         RecordLayout[FieldIndex] = Field;
7833       }
7834     }
7835     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7836              &Data : RecordLayout) {
7837       if (Data.isNull())
7838         continue;
7839       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7840         getPlainLayout(Base, Layout, /*AsBase=*/true);
7841       else
7842         Layout.push_back(Data.get<const FieldDecl *>());
7843     }
7844   }
7845 
7846 public:
7847   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7848       : CurDir(&Dir), CGF(CGF) {
7849     // Extract firstprivate clause information.
7850     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7851       for (const auto *D : C->varlists())
7852         FirstPrivateDecls.try_emplace(
7853             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7854     // Extract device pointer clause information.
7855     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7856       for (auto L : C->component_lists())
7857         DevPointersMap[L.first].push_back(L.second);
7858   }
7859 
7860   /// Constructor for the declare mapper directive.
7861   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7862       : CurDir(&Dir), CGF(CGF) {}
7863 
7864   /// Generate code for the combined entry if we have a partially mapped struct
7865   /// and take care of the mapping flags of the arguments corresponding to
7866   /// individual struct members.
7867   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7868                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7869                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7870                          const StructRangeInfoTy &PartialStruct) const {
7871     // Base is the base of the struct
7872     BasePointers.push_back(PartialStruct.Base.getPointer());
7873     // Pointer is the address of the lowest element
7874     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7875     Pointers.push_back(LB);
7876     // Size is (addr of {highest+1} element) - (addr of lowest element)
7877     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7878     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7879     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7880     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7881     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7882     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7883                                                   /*isSigned=*/false);
7884     Sizes.push_back(Size);
7885     // Map type is always TARGET_PARAM
7886     Types.push_back(OMP_MAP_TARGET_PARAM);
7887     // Remove TARGET_PARAM flag from the first element
7888     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7889 
7890     // All other current entries will be MEMBER_OF the combined entry
7891     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7892     // 0xFFFF in the MEMBER_OF field).
7893     OpenMPOffloadMappingFlags MemberOfFlag =
7894         getMemberOfFlag(BasePointers.size() - 1);
7895     for (auto &M : CurTypes)
7896       setCorrectMemberOfFlag(M, MemberOfFlag);
7897   }
7898 
7899   /// Generate all the base pointers, section pointers, sizes and map
7900   /// types for the extracted mappable expressions. Also, for each item that
7901   /// relates with a device pointer, a pair of the relevant declaration and
7902   /// index where it occurs is appended to the device pointers info array.
7903   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7904                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7905                        MapFlagsArrayTy &Types) const {
7906     // We have to process the component lists that relate with the same
7907     // declaration in a single chunk so that we can generate the map flags
7908     // correctly. Therefore, we organize all lists in a map.
7909     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7910 
7911     // Helper function to fill the information map for the different supported
7912     // clauses.
7913     auto &&InfoGen = [&Info](
7914         const ValueDecl *D,
7915         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7916         OpenMPMapClauseKind MapType,
7917         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7918         bool ReturnDevicePointer, bool IsImplicit) {
7919       const ValueDecl *VD =
7920           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7921       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7922                             IsImplicit);
7923     };
7924 
7925     assert(CurDir.is<const OMPExecutableDirective *>() &&
7926            "Expect a executable directive");
7927     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7928     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7929       for (const auto L : C->component_lists()) {
7930         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7931             /*ReturnDevicePointer=*/false, C->isImplicit());
7932       }
7933     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7934       for (const auto L : C->component_lists()) {
7935         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7936             /*ReturnDevicePointer=*/false, C->isImplicit());
7937       }
7938     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7939       for (const auto L : C->component_lists()) {
7940         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7941             /*ReturnDevicePointer=*/false, C->isImplicit());
7942       }
7943 
7944     // Look at the use_device_ptr clause information and mark the existing map
7945     // entries as such. If there is no map information for an entry in the
7946     // use_device_ptr list, we create one with map type 'alloc' and zero size
7947     // section. It is the user fault if that was not mapped before. If there is
7948     // no map information and the pointer is a struct member, then we defer the
7949     // emission of that entry until the whole struct has been processed.
7950     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7951         DeferredInfo;
7952 
7953     for (const auto *C :
7954          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7955       for (const auto L : C->component_lists()) {
7956         assert(!L.second.empty() && "Not expecting empty list of components!");
7957         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7958         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7959         const Expr *IE = L.second.back().getAssociatedExpression();
7960         // If the first component is a member expression, we have to look into
7961         // 'this', which maps to null in the map of map information. Otherwise
7962         // look directly for the information.
7963         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7964 
7965         // We potentially have map information for this declaration already.
7966         // Look for the first set of components that refer to it.
7967         if (It != Info.end()) {
7968           auto CI = std::find_if(
7969               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7970                 return MI.Components.back().getAssociatedDeclaration() == VD;
7971               });
7972           // If we found a map entry, signal that the pointer has to be returned
7973           // and move on to the next declaration.
7974           if (CI != It->second.end()) {
7975             CI->ReturnDevicePointer = true;
7976             continue;
7977           }
7978         }
7979 
7980         // We didn't find any match in our map information - generate a zero
7981         // size array section - if the pointer is a struct member we defer this
7982         // action until the whole struct has been processed.
7983         if (isa<MemberExpr>(IE)) {
7984           // Insert the pointer into Info to be processed by
7985           // generateInfoForComponentList. Because it is a member pointer
7986           // without a pointee, no entry will be generated for it, therefore
7987           // we need to generate one after the whole struct has been processed.
7988           // Nonetheless, generateInfoForComponentList must be called to take
7989           // the pointer into account for the calculation of the range of the
7990           // partial struct.
7991           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7992                   /*ReturnDevicePointer=*/false, C->isImplicit());
7993           DeferredInfo[nullptr].emplace_back(IE, VD);
7994         } else {
7995           llvm::Value *Ptr =
7996               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7997           BasePointers.emplace_back(Ptr, VD);
7998           Pointers.push_back(Ptr);
7999           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8000           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8001         }
8002       }
8003     }
8004 
8005     for (const auto &M : Info) {
8006       // We need to know when we generate information for the first component
8007       // associated with a capture, because the mapping flags depend on it.
8008       bool IsFirstComponentList = true;
8009 
8010       // Temporary versions of arrays
8011       MapBaseValuesArrayTy CurBasePointers;
8012       MapValuesArrayTy CurPointers;
8013       MapValuesArrayTy CurSizes;
8014       MapFlagsArrayTy CurTypes;
8015       StructRangeInfoTy PartialStruct;
8016 
8017       for (const MapInfo &L : M.second) {
8018         assert(!L.Components.empty() &&
8019                "Not expecting declaration with no component lists.");
8020 
8021         // Remember the current base pointer index.
8022         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8023         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8024                                      CurBasePointers, CurPointers, CurSizes,
8025                                      CurTypes, PartialStruct,
8026                                      IsFirstComponentList, L.IsImplicit);
8027 
8028         // If this entry relates with a device pointer, set the relevant
8029         // declaration and add the 'return pointer' flag.
8030         if (L.ReturnDevicePointer) {
8031           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8032                  "Unexpected number of mapped base pointers.");
8033 
8034           const ValueDecl *RelevantVD =
8035               L.Components.back().getAssociatedDeclaration();
8036           assert(RelevantVD &&
8037                  "No relevant declaration related with device pointer??");
8038 
8039           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8040           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8041         }
8042         IsFirstComponentList = false;
8043       }
8044 
8045       // Append any pending zero-length pointers which are struct members and
8046       // used with use_device_ptr.
8047       auto CI = DeferredInfo.find(M.first);
8048       if (CI != DeferredInfo.end()) {
8049         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8050           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8051           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8052               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8053           CurBasePointers.emplace_back(BasePtr, L.VD);
8054           CurPointers.push_back(Ptr);
8055           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8056           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8057           // value MEMBER_OF=FFFF so that the entry is later updated with the
8058           // correct value of MEMBER_OF.
8059           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8060                              OMP_MAP_MEMBER_OF);
8061         }
8062       }
8063 
8064       // If there is an entry in PartialStruct it means we have a struct with
8065       // individual members mapped. Emit an extra combined entry.
8066       if (PartialStruct.Base.isValid())
8067         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8068                           PartialStruct);
8069 
8070       // We need to append the results of this capture to what we already have.
8071       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8072       Pointers.append(CurPointers.begin(), CurPointers.end());
8073       Sizes.append(CurSizes.begin(), CurSizes.end());
8074       Types.append(CurTypes.begin(), CurTypes.end());
8075     }
8076   }
8077 
8078   /// Generate all the base pointers, section pointers, sizes and map types for
8079   /// the extracted map clauses of user-defined mapper.
8080   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8081                                 MapValuesArrayTy &Pointers,
8082                                 MapValuesArrayTy &Sizes,
8083                                 MapFlagsArrayTy &Types) const {
8084     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8085            "Expect a declare mapper directive");
8086     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8087     // We have to process the component lists that relate with the same
8088     // declaration in a single chunk so that we can generate the map flags
8089     // correctly. Therefore, we organize all lists in a map.
8090     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8091 
8092     // Helper function to fill the information map for the different supported
8093     // clauses.
8094     auto &&InfoGen = [&Info](
8095         const ValueDecl *D,
8096         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8097         OpenMPMapClauseKind MapType,
8098         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8099         bool ReturnDevicePointer, bool IsImplicit) {
8100       const ValueDecl *VD =
8101           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8102       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8103                             IsImplicit);
8104     };
8105 
8106     for (const auto *C : CurMapperDir->clauselists()) {
8107       const auto *MC = cast<OMPMapClause>(C);
8108       for (const auto L : MC->component_lists()) {
8109         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8110                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8111       }
8112     }
8113 
8114     for (const auto &M : Info) {
8115       // We need to know when we generate information for the first component
8116       // associated with a capture, because the mapping flags depend on it.
8117       bool IsFirstComponentList = true;
8118 
8119       // Temporary versions of arrays
8120       MapBaseValuesArrayTy CurBasePointers;
8121       MapValuesArrayTy CurPointers;
8122       MapValuesArrayTy CurSizes;
8123       MapFlagsArrayTy CurTypes;
8124       StructRangeInfoTy PartialStruct;
8125 
8126       for (const MapInfo &L : M.second) {
8127         assert(!L.Components.empty() &&
8128                "Not expecting declaration with no component lists.");
8129         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8130                                      CurBasePointers, CurPointers, CurSizes,
8131                                      CurTypes, PartialStruct,
8132                                      IsFirstComponentList, L.IsImplicit);
8133         IsFirstComponentList = false;
8134       }
8135 
8136       // If there is an entry in PartialStruct it means we have a struct with
8137       // individual members mapped. Emit an extra combined entry.
8138       if (PartialStruct.Base.isValid())
8139         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8140                           PartialStruct);
8141 
8142       // We need to append the results of this capture to what we already have.
8143       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8144       Pointers.append(CurPointers.begin(), CurPointers.end());
8145       Sizes.append(CurSizes.begin(), CurSizes.end());
8146       Types.append(CurTypes.begin(), CurTypes.end());
8147     }
8148   }
8149 
8150   /// Emit capture info for lambdas for variables captured by reference.
8151   void generateInfoForLambdaCaptures(
8152       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8153       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8154       MapFlagsArrayTy &Types,
8155       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8156     const auto *RD = VD->getType()
8157                          .getCanonicalType()
8158                          .getNonReferenceType()
8159                          ->getAsCXXRecordDecl();
8160     if (!RD || !RD->isLambda())
8161       return;
8162     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8163     LValue VDLVal = CGF.MakeAddrLValue(
8164         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8165     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8166     FieldDecl *ThisCapture = nullptr;
8167     RD->getCaptureFields(Captures, ThisCapture);
8168     if (ThisCapture) {
8169       LValue ThisLVal =
8170           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8171       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8172       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8173                                  VDLVal.getPointer(CGF));
8174       BasePointers.push_back(ThisLVal.getPointer(CGF));
8175       Pointers.push_back(ThisLValVal.getPointer(CGF));
8176       Sizes.push_back(
8177           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178                                     CGF.Int64Ty, /*isSigned=*/true));
8179       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8180                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8181     }
8182     for (const LambdaCapture &LC : RD->captures()) {
8183       if (!LC.capturesVariable())
8184         continue;
8185       const VarDecl *VD = LC.getCapturedVar();
8186       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8187         continue;
8188       auto It = Captures.find(VD);
8189       assert(It != Captures.end() && "Found lambda capture without field.");
8190       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8191       if (LC.getCaptureKind() == LCK_ByRef) {
8192         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8193         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8194                                    VDLVal.getPointer(CGF));
8195         BasePointers.push_back(VarLVal.getPointer(CGF));
8196         Pointers.push_back(VarLValVal.getPointer(CGF));
8197         Sizes.push_back(CGF.Builder.CreateIntCast(
8198             CGF.getTypeSize(
8199                 VD->getType().getCanonicalType().getNonReferenceType()),
8200             CGF.Int64Ty, /*isSigned=*/true));
8201       } else {
8202         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8203         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8204                                    VDLVal.getPointer(CGF));
8205         BasePointers.push_back(VarLVal.getPointer(CGF));
8206         Pointers.push_back(VarRVal.getScalarVal());
8207         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8208       }
8209       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8210                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8211     }
8212   }
8213 
8214   /// Set correct indices for lambdas captures.
8215   void adjustMemberOfForLambdaCaptures(
8216       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8217       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8218       MapFlagsArrayTy &Types) const {
8219     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8220       // Set correct member_of idx for all implicit lambda captures.
8221       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8222                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8223         continue;
8224       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8225       assert(BasePtr && "Unable to find base lambda address.");
8226       int TgtIdx = -1;
8227       for (unsigned J = I; J > 0; --J) {
8228         unsigned Idx = J - 1;
8229         if (Pointers[Idx] != BasePtr)
8230           continue;
8231         TgtIdx = Idx;
8232         break;
8233       }
8234       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8235       // All other current entries will be MEMBER_OF the combined entry
8236       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8237       // 0xFFFF in the MEMBER_OF field).
8238       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8239       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8240     }
8241   }
8242 
8243   /// Generate the base pointers, section pointers, sizes and map types
8244   /// associated to a given capture.
8245   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8246                               llvm::Value *Arg,
8247                               MapBaseValuesArrayTy &BasePointers,
8248                               MapValuesArrayTy &Pointers,
8249                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8250                               StructRangeInfoTy &PartialStruct) const {
8251     assert(!Cap->capturesVariableArrayType() &&
8252            "Not expecting to generate map info for a variable array type!");
8253 
8254     // We need to know when we generating information for the first component
8255     const ValueDecl *VD = Cap->capturesThis()
8256                               ? nullptr
8257                               : Cap->getCapturedVar()->getCanonicalDecl();
8258 
8259     // If this declaration appears in a is_device_ptr clause we just have to
8260     // pass the pointer by value. If it is a reference to a declaration, we just
8261     // pass its value.
8262     if (DevPointersMap.count(VD)) {
8263       BasePointers.emplace_back(Arg, VD);
8264       Pointers.push_back(Arg);
8265       Sizes.push_back(
8266           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8267                                     CGF.Int64Ty, /*isSigned=*/true));
8268       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8269       return;
8270     }
8271 
8272     using MapData =
8273         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8274                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8275     SmallVector<MapData, 4> DeclComponentLists;
8276     assert(CurDir.is<const OMPExecutableDirective *>() &&
8277            "Expect a executable directive");
8278     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8279     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8280       for (const auto L : C->decl_component_lists(VD)) {
8281         assert(L.first == VD &&
8282                "We got information for the wrong declaration??");
8283         assert(!L.second.empty() &&
8284                "Not expecting declaration with no component lists.");
8285         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8286                                         C->getMapTypeModifiers(),
8287                                         C->isImplicit());
8288       }
8289     }
8290 
8291     // Find overlapping elements (including the offset from the base element).
8292     llvm::SmallDenseMap<
8293         const MapData *,
8294         llvm::SmallVector<
8295             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8296         4>
8297         OverlappedData;
8298     size_t Count = 0;
8299     for (const MapData &L : DeclComponentLists) {
8300       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8301       OpenMPMapClauseKind MapType;
8302       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8303       bool IsImplicit;
8304       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8305       ++Count;
8306       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8307         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8308         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8309         auto CI = Components.rbegin();
8310         auto CE = Components.rend();
8311         auto SI = Components1.rbegin();
8312         auto SE = Components1.rend();
8313         for (; CI != CE && SI != SE; ++CI, ++SI) {
8314           if (CI->getAssociatedExpression()->getStmtClass() !=
8315               SI->getAssociatedExpression()->getStmtClass())
8316             break;
8317           // Are we dealing with different variables/fields?
8318           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8319             break;
8320         }
8321         // Found overlapping if, at least for one component, reached the head of
8322         // the components list.
8323         if (CI == CE || SI == SE) {
8324           assert((CI != CE || SI != SE) &&
8325                  "Unexpected full match of the mapping components.");
8326           const MapData &BaseData = CI == CE ? L : L1;
8327           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8328               SI == SE ? Components : Components1;
8329           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8330           OverlappedElements.getSecond().push_back(SubData);
8331         }
8332       }
8333     }
8334     // Sort the overlapped elements for each item.
8335     llvm::SmallVector<const FieldDecl *, 4> Layout;
8336     if (!OverlappedData.empty()) {
8337       if (const auto *CRD =
8338               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8339         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8340       else {
8341         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8342         Layout.append(RD->field_begin(), RD->field_end());
8343       }
8344     }
8345     for (auto &Pair : OverlappedData) {
8346       llvm::sort(
8347           Pair.getSecond(),
8348           [&Layout](
8349               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8350               OMPClauseMappableExprCommon::MappableExprComponentListRef
8351                   Second) {
8352             auto CI = First.rbegin();
8353             auto CE = First.rend();
8354             auto SI = Second.rbegin();
8355             auto SE = Second.rend();
8356             for (; CI != CE && SI != SE; ++CI, ++SI) {
8357               if (CI->getAssociatedExpression()->getStmtClass() !=
8358                   SI->getAssociatedExpression()->getStmtClass())
8359                 break;
8360               // Are we dealing with different variables/fields?
8361               if (CI->getAssociatedDeclaration() !=
8362                   SI->getAssociatedDeclaration())
8363                 break;
8364             }
8365 
8366             // Lists contain the same elements.
8367             if (CI == CE && SI == SE)
8368               return false;
8369 
8370             // List with less elements is less than list with more elements.
8371             if (CI == CE || SI == SE)
8372               return CI == CE;
8373 
8374             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8375             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8376             if (FD1->getParent() == FD2->getParent())
8377               return FD1->getFieldIndex() < FD2->getFieldIndex();
8378             const auto It =
8379                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8380                   return FD == FD1 || FD == FD2;
8381                 });
8382             return *It == FD1;
8383           });
8384     }
8385 
8386     // Associated with a capture, because the mapping flags depend on it.
8387     // Go through all of the elements with the overlapped elements.
8388     for (const auto &Pair : OverlappedData) {
8389       const MapData &L = *Pair.getFirst();
8390       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8391       OpenMPMapClauseKind MapType;
8392       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8393       bool IsImplicit;
8394       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8395       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8396           OverlappedComponents = Pair.getSecond();
8397       bool IsFirstComponentList = true;
8398       generateInfoForComponentList(MapType, MapModifiers, Components,
8399                                    BasePointers, Pointers, Sizes, Types,
8400                                    PartialStruct, IsFirstComponentList,
8401                                    IsImplicit, OverlappedComponents);
8402     }
8403     // Go through other elements without overlapped elements.
8404     bool IsFirstComponentList = OverlappedData.empty();
8405     for (const MapData &L : DeclComponentLists) {
8406       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8407       OpenMPMapClauseKind MapType;
8408       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8409       bool IsImplicit;
8410       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8411       auto It = OverlappedData.find(&L);
8412       if (It == OverlappedData.end())
8413         generateInfoForComponentList(MapType, MapModifiers, Components,
8414                                      BasePointers, Pointers, Sizes, Types,
8415                                      PartialStruct, IsFirstComponentList,
8416                                      IsImplicit);
8417       IsFirstComponentList = false;
8418     }
8419   }
8420 
8421   /// Generate the base pointers, section pointers, sizes and map types
8422   /// associated with the declare target link variables.
8423   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8424                                         MapValuesArrayTy &Pointers,
8425                                         MapValuesArrayTy &Sizes,
8426                                         MapFlagsArrayTy &Types) const {
8427     assert(CurDir.is<const OMPExecutableDirective *>() &&
8428            "Expect a executable directive");
8429     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8430     // Map other list items in the map clause which are not captured variables
8431     // but "declare target link" global variables.
8432     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8433       for (const auto L : C->component_lists()) {
8434         if (!L.first)
8435           continue;
8436         const auto *VD = dyn_cast<VarDecl>(L.first);
8437         if (!VD)
8438           continue;
8439         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8440             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8441         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8442             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8443           continue;
8444         StructRangeInfoTy PartialStruct;
8445         generateInfoForComponentList(
8446             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8447             Pointers, Sizes, Types, PartialStruct,
8448             /*IsFirstComponentList=*/true, C->isImplicit());
8449         assert(!PartialStruct.Base.isValid() &&
8450                "No partial structs for declare target link expected.");
8451       }
8452     }
8453   }
8454 
8455   /// Generate the default map information for a given capture \a CI,
8456   /// record field declaration \a RI and captured value \a CV.
8457   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8458                               const FieldDecl &RI, llvm::Value *CV,
8459                               MapBaseValuesArrayTy &CurBasePointers,
8460                               MapValuesArrayTy &CurPointers,
8461                               MapValuesArrayTy &CurSizes,
8462                               MapFlagsArrayTy &CurMapTypes) const {
8463     bool IsImplicit = true;
8464     // Do the default mapping.
8465     if (CI.capturesThis()) {
8466       CurBasePointers.push_back(CV);
8467       CurPointers.push_back(CV);
8468       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8469       CurSizes.push_back(
8470           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8471                                     CGF.Int64Ty, /*isSigned=*/true));
8472       // Default map type.
8473       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8474     } else if (CI.capturesVariableByCopy()) {
8475       CurBasePointers.push_back(CV);
8476       CurPointers.push_back(CV);
8477       if (!RI.getType()->isAnyPointerType()) {
8478         // We have to signal to the runtime captures passed by value that are
8479         // not pointers.
8480         CurMapTypes.push_back(OMP_MAP_LITERAL);
8481         CurSizes.push_back(CGF.Builder.CreateIntCast(
8482             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8483       } else {
8484         // Pointers are implicitly mapped with a zero size and no flags
8485         // (other than first map that is added for all implicit maps).
8486         CurMapTypes.push_back(OMP_MAP_NONE);
8487         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8488       }
8489       const VarDecl *VD = CI.getCapturedVar();
8490       auto I = FirstPrivateDecls.find(VD);
8491       if (I != FirstPrivateDecls.end())
8492         IsImplicit = I->getSecond();
8493     } else {
8494       assert(CI.capturesVariable() && "Expected captured reference.");
8495       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8496       QualType ElementType = PtrTy->getPointeeType();
8497       CurSizes.push_back(CGF.Builder.CreateIntCast(
8498           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8499       // The default map type for a scalar/complex type is 'to' because by
8500       // default the value doesn't have to be retrieved. For an aggregate
8501       // type, the default is 'tofrom'.
8502       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8503       const VarDecl *VD = CI.getCapturedVar();
8504       auto I = FirstPrivateDecls.find(VD);
8505       if (I != FirstPrivateDecls.end() &&
8506           VD->getType().isConstant(CGF.getContext())) {
8507         llvm::Constant *Addr =
8508             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8509         // Copy the value of the original variable to the new global copy.
8510         CGF.Builder.CreateMemCpy(
8511             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8512             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8513             CurSizes.back(), /*IsVolatile=*/false);
8514         // Use new global variable as the base pointers.
8515         CurBasePointers.push_back(Addr);
8516         CurPointers.push_back(Addr);
8517       } else {
8518         CurBasePointers.push_back(CV);
8519         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8520           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8521               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8522               AlignmentSource::Decl));
8523           CurPointers.push_back(PtrAddr.getPointer());
8524         } else {
8525           CurPointers.push_back(CV);
8526         }
8527       }
8528       if (I != FirstPrivateDecls.end())
8529         IsImplicit = I->getSecond();
8530     }
8531     // Every default map produces a single argument which is a target parameter.
8532     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8533 
8534     // Add flag stating this is an implicit map.
8535     if (IsImplicit)
8536       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8537   }
8538 };
8539 } // anonymous namespace
8540 
8541 /// Emit the arrays used to pass the captures and map information to the
8542 /// offloading runtime library. If there is no map or capture information,
8543 /// return nullptr by reference.
8544 static void
8545 emitOffloadingArrays(CodeGenFunction &CGF,
8546                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8547                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8548                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8549                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8550                      CGOpenMPRuntime::TargetDataInfo &Info) {
8551   CodeGenModule &CGM = CGF.CGM;
8552   ASTContext &Ctx = CGF.getContext();
8553 
8554   // Reset the array information.
8555   Info.clearArrayInfo();
8556   Info.NumberOfPtrs = BasePointers.size();
8557 
8558   if (Info.NumberOfPtrs) {
8559     // Detect if we have any capture size requiring runtime evaluation of the
8560     // size so that a constant array could be eventually used.
8561     bool hasRuntimeEvaluationCaptureSize = false;
8562     for (llvm::Value *S : Sizes)
8563       if (!isa<llvm::Constant>(S)) {
8564         hasRuntimeEvaluationCaptureSize = true;
8565         break;
8566       }
8567 
8568     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8569     QualType PointerArrayType = Ctx.getConstantArrayType(
8570         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8571         /*IndexTypeQuals=*/0);
8572 
8573     Info.BasePointersArray =
8574         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8575     Info.PointersArray =
8576         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8577 
8578     // If we don't have any VLA types or other types that require runtime
8579     // evaluation, we can use a constant array for the map sizes, otherwise we
8580     // need to fill up the arrays as we do for the pointers.
8581     QualType Int64Ty =
8582         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8583     if (hasRuntimeEvaluationCaptureSize) {
8584       QualType SizeArrayType = Ctx.getConstantArrayType(
8585           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8586           /*IndexTypeQuals=*/0);
8587       Info.SizesArray =
8588           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8589     } else {
8590       // We expect all the sizes to be constant, so we collect them to create
8591       // a constant array.
8592       SmallVector<llvm::Constant *, 16> ConstSizes;
8593       for (llvm::Value *S : Sizes)
8594         ConstSizes.push_back(cast<llvm::Constant>(S));
8595 
8596       auto *SizesArrayInit = llvm::ConstantArray::get(
8597           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8598       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8599       auto *SizesArrayGbl = new llvm::GlobalVariable(
8600           CGM.getModule(), SizesArrayInit->getType(),
8601           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8602           SizesArrayInit, Name);
8603       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8604       Info.SizesArray = SizesArrayGbl;
8605     }
8606 
8607     // The map types are always constant so we don't need to generate code to
8608     // fill arrays. Instead, we create an array constant.
8609     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8610     llvm::copy(MapTypes, Mapping.begin());
8611     llvm::Constant *MapTypesArrayInit =
8612         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8613     std::string MaptypesName =
8614         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8615     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8616         CGM.getModule(), MapTypesArrayInit->getType(),
8617         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8618         MapTypesArrayInit, MaptypesName);
8619     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8620     Info.MapTypesArray = MapTypesArrayGbl;
8621 
8622     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8623       llvm::Value *BPVal = *BasePointers[I];
8624       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8625           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8626           Info.BasePointersArray, 0, I);
8627       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8628           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8629       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8630       CGF.Builder.CreateStore(BPVal, BPAddr);
8631 
8632       if (Info.requiresDevicePointerInfo())
8633         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8634           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8635 
8636       llvm::Value *PVal = Pointers[I];
8637       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8638           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8639           Info.PointersArray, 0, I);
8640       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8641           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8642       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8643       CGF.Builder.CreateStore(PVal, PAddr);
8644 
8645       if (hasRuntimeEvaluationCaptureSize) {
8646         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8647             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8648             Info.SizesArray,
8649             /*Idx0=*/0,
8650             /*Idx1=*/I);
8651         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8652         CGF.Builder.CreateStore(
8653             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8654             SAddr);
8655       }
8656     }
8657   }
8658 }
8659 
8660 /// Emit the arguments to be passed to the runtime library based on the
8661 /// arrays of pointers, sizes and map types.
8662 static void emitOffloadingArraysArgument(
8663     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8664     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8665     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8666   CodeGenModule &CGM = CGF.CGM;
8667   if (Info.NumberOfPtrs) {
8668     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8669         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8670         Info.BasePointersArray,
8671         /*Idx0=*/0, /*Idx1=*/0);
8672     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8673         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8674         Info.PointersArray,
8675         /*Idx0=*/0,
8676         /*Idx1=*/0);
8677     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8679         /*Idx0=*/0, /*Idx1=*/0);
8680     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8681         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8682         Info.MapTypesArray,
8683         /*Idx0=*/0,
8684         /*Idx1=*/0);
8685   } else {
8686     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8687     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8688     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8689     MapTypesArrayArg =
8690         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8691   }
8692 }
8693 
8694 /// Check for inner distribute directive.
8695 static const OMPExecutableDirective *
8696 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8697   const auto *CS = D.getInnermostCapturedStmt();
8698   const auto *Body =
8699       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8700   const Stmt *ChildStmt =
8701       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8702 
8703   if (const auto *NestedDir =
8704           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8705     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8706     switch (D.getDirectiveKind()) {
8707     case OMPD_target:
8708       if (isOpenMPDistributeDirective(DKind))
8709         return NestedDir;
8710       if (DKind == OMPD_teams) {
8711         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8712             /*IgnoreCaptured=*/true);
8713         if (!Body)
8714           return nullptr;
8715         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8716         if (const auto *NND =
8717                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8718           DKind = NND->getDirectiveKind();
8719           if (isOpenMPDistributeDirective(DKind))
8720             return NND;
8721         }
8722       }
8723       return nullptr;
8724     case OMPD_target_teams:
8725       if (isOpenMPDistributeDirective(DKind))
8726         return NestedDir;
8727       return nullptr;
8728     case OMPD_target_parallel:
8729     case OMPD_target_simd:
8730     case OMPD_target_parallel_for:
8731     case OMPD_target_parallel_for_simd:
8732       return nullptr;
8733     case OMPD_target_teams_distribute:
8734     case OMPD_target_teams_distribute_simd:
8735     case OMPD_target_teams_distribute_parallel_for:
8736     case OMPD_target_teams_distribute_parallel_for_simd:
8737     case OMPD_parallel:
8738     case OMPD_for:
8739     case OMPD_parallel_for:
8740     case OMPD_parallel_master:
8741     case OMPD_parallel_sections:
8742     case OMPD_for_simd:
8743     case OMPD_parallel_for_simd:
8744     case OMPD_cancel:
8745     case OMPD_cancellation_point:
8746     case OMPD_ordered:
8747     case OMPD_threadprivate:
8748     case OMPD_allocate:
8749     case OMPD_task:
8750     case OMPD_simd:
8751     case OMPD_sections:
8752     case OMPD_section:
8753     case OMPD_single:
8754     case OMPD_master:
8755     case OMPD_critical:
8756     case OMPD_taskyield:
8757     case OMPD_barrier:
8758     case OMPD_taskwait:
8759     case OMPD_taskgroup:
8760     case OMPD_atomic:
8761     case OMPD_flush:
8762     case OMPD_teams:
8763     case OMPD_target_data:
8764     case OMPD_target_exit_data:
8765     case OMPD_target_enter_data:
8766     case OMPD_distribute:
8767     case OMPD_distribute_simd:
8768     case OMPD_distribute_parallel_for:
8769     case OMPD_distribute_parallel_for_simd:
8770     case OMPD_teams_distribute:
8771     case OMPD_teams_distribute_simd:
8772     case OMPD_teams_distribute_parallel_for:
8773     case OMPD_teams_distribute_parallel_for_simd:
8774     case OMPD_target_update:
8775     case OMPD_declare_simd:
8776     case OMPD_declare_variant:
8777     case OMPD_declare_target:
8778     case OMPD_end_declare_target:
8779     case OMPD_declare_reduction:
8780     case OMPD_declare_mapper:
8781     case OMPD_taskloop:
8782     case OMPD_taskloop_simd:
8783     case OMPD_master_taskloop:
8784     case OMPD_master_taskloop_simd:
8785     case OMPD_parallel_master_taskloop:
8786     case OMPD_parallel_master_taskloop_simd:
8787     case OMPD_requires:
8788     case OMPD_unknown:
8789       llvm_unreachable("Unexpected directive.");
8790     }
8791   }
8792 
8793   return nullptr;
8794 }
8795 
8796 /// Emit the user-defined mapper function. The code generation follows the
8797 /// pattern in the example below.
8798 /// \code
8799 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8800 ///                                           void *base, void *begin,
8801 ///                                           int64_t size, int64_t type) {
8802 ///   // Allocate space for an array section first.
8803 ///   if (size > 1 && !maptype.IsDelete)
8804 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8805 ///                                 size*sizeof(Ty), clearToFrom(type));
8806 ///   // Map members.
8807 ///   for (unsigned i = 0; i < size; i++) {
8808 ///     // For each component specified by this mapper:
8809 ///     for (auto c : all_components) {
8810 ///       if (c.hasMapper())
8811 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8812 ///                       c.arg_type);
8813 ///       else
8814 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8815 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8816 ///     }
8817 ///   }
8818 ///   // Delete the array section.
8819 ///   if (size > 1 && maptype.IsDelete)
8820 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8821 ///                                 size*sizeof(Ty), clearToFrom(type));
8822 /// }
8823 /// \endcode
8824 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8825                                             CodeGenFunction *CGF) {
8826   if (UDMMap.count(D) > 0)
8827     return;
8828   ASTContext &C = CGM.getContext();
8829   QualType Ty = D->getType();
8830   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8831   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8832   auto *MapperVarDecl =
8833       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8834   SourceLocation Loc = D->getLocation();
8835   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8836 
8837   // Prepare mapper function arguments and attributes.
8838   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8839                               C.VoidPtrTy, ImplicitParamDecl::Other);
8840   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8841                             ImplicitParamDecl::Other);
8842   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8843                              C.VoidPtrTy, ImplicitParamDecl::Other);
8844   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8845                             ImplicitParamDecl::Other);
8846   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8847                             ImplicitParamDecl::Other);
8848   FunctionArgList Args;
8849   Args.push_back(&HandleArg);
8850   Args.push_back(&BaseArg);
8851   Args.push_back(&BeginArg);
8852   Args.push_back(&SizeArg);
8853   Args.push_back(&TypeArg);
8854   const CGFunctionInfo &FnInfo =
8855       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8856   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8857   SmallString<64> TyStr;
8858   llvm::raw_svector_ostream Out(TyStr);
8859   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8860   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8861   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8862                                     Name, &CGM.getModule());
8863   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8864   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8865   // Start the mapper function code generation.
8866   CodeGenFunction MapperCGF(CGM);
8867   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8868   // Compute the starting and end addreses of array elements.
8869   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8870       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8871       C.getPointerType(Int64Ty), Loc);
8872   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8873       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8874       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8875   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8876   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8877       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8878       C.getPointerType(Int64Ty), Loc);
8879   // Prepare common arguments for array initiation and deletion.
8880   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8881       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8882       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8883   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8884       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8885       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8886   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8887       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8888       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8889 
8890   // Emit array initiation if this is an array section and \p MapType indicates
8891   // that memory allocation is required.
8892   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8893   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8894                              ElementSize, HeadBB, /*IsInit=*/true);
8895 
8896   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8897 
8898   // Emit the loop header block.
8899   MapperCGF.EmitBlock(HeadBB);
8900   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8901   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8902   // Evaluate whether the initial condition is satisfied.
8903   llvm::Value *IsEmpty =
8904       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8905   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8906   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8907 
8908   // Emit the loop body block.
8909   MapperCGF.EmitBlock(BodyBB);
8910   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8911       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8912   PtrPHI->addIncoming(PtrBegin, EntryBB);
8913   Address PtrCurrent =
8914       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8915                           .getAlignment()
8916                           .alignmentOfArrayElement(ElementSize));
8917   // Privatize the declared variable of mapper to be the current array element.
8918   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8919   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8920     return MapperCGF
8921         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8922         .getAddress(MapperCGF);
8923   });
8924   (void)Scope.Privatize();
8925 
8926   // Get map clause information. Fill up the arrays with all mapped variables.
8927   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8928   MappableExprsHandler::MapValuesArrayTy Pointers;
8929   MappableExprsHandler::MapValuesArrayTy Sizes;
8930   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8931   MappableExprsHandler MEHandler(*D, MapperCGF);
8932   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8933 
8934   // Call the runtime API __tgt_mapper_num_components to get the number of
8935   // pre-existing components.
8936   llvm::Value *OffloadingArgs[] = {Handle};
8937   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8938       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8939   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8940       PreviousSize,
8941       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8942 
8943   // Fill up the runtime mapper handle for all components.
8944   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8945     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8946         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8947     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8948         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8949     llvm::Value *CurSizeArg = Sizes[I];
8950 
8951     // Extract the MEMBER_OF field from the map type.
8952     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8953     MapperCGF.EmitBlock(MemberBB);
8954     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8955     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8956         OriMapType,
8957         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8958     llvm::BasicBlock *MemberCombineBB =
8959         MapperCGF.createBasicBlock("omp.member.combine");
8960     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8961     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8962     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8963     // Add the number of pre-existing components to the MEMBER_OF field if it
8964     // is valid.
8965     MapperCGF.EmitBlock(MemberCombineBB);
8966     llvm::Value *CombinedMember =
8967         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8968     // Do nothing if it is not a member of previous components.
8969     MapperCGF.EmitBlock(TypeBB);
8970     llvm::PHINode *MemberMapType =
8971         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8972     MemberMapType->addIncoming(OriMapType, MemberBB);
8973     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8974 
8975     // Combine the map type inherited from user-defined mapper with that
8976     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8977     // bits of the \a MapType, which is the input argument of the mapper
8978     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8979     // bits of MemberMapType.
8980     // [OpenMP 5.0], 1.2.6. map-type decay.
8981     //        | alloc |  to   | from  | tofrom | release | delete
8982     // ----------------------------------------------------------
8983     // alloc  | alloc | alloc | alloc | alloc  | release | delete
8984     // to     | alloc |  to   | alloc |   to   | release | delete
8985     // from   | alloc | alloc | from  |  from  | release | delete
8986     // tofrom | alloc |  to   | from  | tofrom | release | delete
8987     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8988         MapType,
8989         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8990                                    MappableExprsHandler::OMP_MAP_FROM));
8991     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8992     llvm::BasicBlock *AllocElseBB =
8993         MapperCGF.createBasicBlock("omp.type.alloc.else");
8994     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
8995     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
8996     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
8997     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
8998     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
8999     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9000     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9001     MapperCGF.EmitBlock(AllocBB);
9002     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9003         MemberMapType,
9004         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9005                                      MappableExprsHandler::OMP_MAP_FROM)));
9006     MapperCGF.Builder.CreateBr(EndBB);
9007     MapperCGF.EmitBlock(AllocElseBB);
9008     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9009         LeftToFrom,
9010         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9011     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9012     // In case of to, clear OMP_MAP_FROM.
9013     MapperCGF.EmitBlock(ToBB);
9014     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9015         MemberMapType,
9016         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9017     MapperCGF.Builder.CreateBr(EndBB);
9018     MapperCGF.EmitBlock(ToElseBB);
9019     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9020         LeftToFrom,
9021         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9022     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9023     // In case of from, clear OMP_MAP_TO.
9024     MapperCGF.EmitBlock(FromBB);
9025     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9026         MemberMapType,
9027         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9028     // In case of tofrom, do nothing.
9029     MapperCGF.EmitBlock(EndBB);
9030     llvm::PHINode *CurMapType =
9031         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9032     CurMapType->addIncoming(AllocMapType, AllocBB);
9033     CurMapType->addIncoming(ToMapType, ToBB);
9034     CurMapType->addIncoming(FromMapType, FromBB);
9035     CurMapType->addIncoming(MemberMapType, ToElseBB);
9036 
9037     // TODO: call the corresponding mapper function if a user-defined mapper is
9038     // associated with this map clause.
9039     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9040     // data structure.
9041     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9042                                      CurSizeArg, CurMapType};
9043     MapperCGF.EmitRuntimeCall(
9044         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9045         OffloadingArgs);
9046   }
9047 
9048   // Update the pointer to point to the next element that needs to be mapped,
9049   // and check whether we have mapped all elements.
9050   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9051       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9052   PtrPHI->addIncoming(PtrNext, BodyBB);
9053   llvm::Value *IsDone =
9054       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9055   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9056   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9057 
9058   MapperCGF.EmitBlock(ExitBB);
9059   // Emit array deletion if this is an array section and \p MapType indicates
9060   // that deletion is required.
9061   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9062                              ElementSize, DoneBB, /*IsInit=*/false);
9063 
9064   // Emit the function exit block.
9065   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9066   MapperCGF.FinishFunction();
9067   UDMMap.try_emplace(D, Fn);
9068   if (CGF) {
9069     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9070     Decls.second.push_back(D);
9071   }
9072 }
9073 
9074 /// Emit the array initialization or deletion portion for user-defined mapper
9075 /// code generation. First, it evaluates whether an array section is mapped and
9076 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9077 /// true, and \a MapType indicates to not delete this array, array
9078 /// initialization code is generated. If \a IsInit is false, and \a MapType
9079 /// indicates to not this array, array deletion code is generated.
9080 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9081     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9082     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9083     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9084   StringRef Prefix = IsInit ? ".init" : ".del";
9085 
9086   // Evaluate if this is an array section.
9087   llvm::BasicBlock *IsDeleteBB =
9088       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9089   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9090   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9091       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9092   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9093 
9094   // Evaluate if we are going to delete this section.
9095   MapperCGF.EmitBlock(IsDeleteBB);
9096   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9097       MapType,
9098       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9099   llvm::Value *DeleteCond;
9100   if (IsInit) {
9101     DeleteCond = MapperCGF.Builder.CreateIsNull(
9102         DeleteBit, "omp.array" + Prefix + ".delete");
9103   } else {
9104     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9105         DeleteBit, "omp.array" + Prefix + ".delete");
9106   }
9107   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9108 
9109   MapperCGF.EmitBlock(BodyBB);
9110   // Get the array size by multiplying element size and element number (i.e., \p
9111   // Size).
9112   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9113       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9114   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9115   // memory allocation/deletion purpose only.
9116   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9117       MapType,
9118       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9119                                    MappableExprsHandler::OMP_MAP_FROM)));
9120   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9121   // data structure.
9122   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9123   MapperCGF.EmitRuntimeCall(
9124       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9125 }
9126 
9127 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9128     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9129     llvm::Value *DeviceID,
9130     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9131                                      const OMPLoopDirective &D)>
9132         SizeEmitter) {
9133   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9134   const OMPExecutableDirective *TD = &D;
9135   // Get nested teams distribute kind directive, if any.
9136   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9137     TD = getNestedDistributeDirective(CGM.getContext(), D);
9138   if (!TD)
9139     return;
9140   const auto *LD = cast<OMPLoopDirective>(TD);
9141   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9142                                                      PrePostActionTy &) {
9143     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9144       llvm::Value *Args[] = {DeviceID, NumIterations};
9145       CGF.EmitRuntimeCall(
9146           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9147     }
9148   };
9149   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9150 }
9151 
9152 void CGOpenMPRuntime::emitTargetCall(
9153     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9154     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9155     const Expr *Device,
9156     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9157                                      const OMPLoopDirective &D)>
9158         SizeEmitter) {
9159   if (!CGF.HaveInsertPoint())
9160     return;
9161 
9162   assert(OutlinedFn && "Invalid outlined function!");
9163 
9164   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9165   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9166   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9167   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9168                                             PrePostActionTy &) {
9169     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9170   };
9171   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9172 
9173   CodeGenFunction::OMPTargetDataInfo InputInfo;
9174   llvm::Value *MapTypesArray = nullptr;
9175   // Fill up the pointer arrays and transfer execution to the device.
9176   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9177                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9178                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9179     // On top of the arrays that were filled up, the target offloading call
9180     // takes as arguments the device id as well as the host pointer. The host
9181     // pointer is used by the runtime library to identify the current target
9182     // region, so it only has to be unique and not necessarily point to
9183     // anything. It could be the pointer to the outlined function that
9184     // implements the target region, but we aren't using that so that the
9185     // compiler doesn't need to keep that, and could therefore inline the host
9186     // function if proven worthwhile during optimization.
9187 
9188     // From this point on, we need to have an ID of the target region defined.
9189     assert(OutlinedFnID && "Invalid outlined function ID!");
9190 
9191     // Emit device ID if any.
9192     llvm::Value *DeviceID;
9193     if (Device) {
9194       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9195                                            CGF.Int64Ty, /*isSigned=*/true);
9196     } else {
9197       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9198     }
9199 
9200     // Emit the number of elements in the offloading arrays.
9201     llvm::Value *PointerNum =
9202         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9203 
9204     // Return value of the runtime offloading call.
9205     llvm::Value *Return;
9206 
9207     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9208     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9209 
9210     // Emit tripcount for the target loop-based directive.
9211     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9212 
9213     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9214     // The target region is an outlined function launched by the runtime
9215     // via calls __tgt_target() or __tgt_target_teams().
9216     //
9217     // __tgt_target() launches a target region with one team and one thread,
9218     // executing a serial region.  This master thread may in turn launch
9219     // more threads within its team upon encountering a parallel region,
9220     // however, no additional teams can be launched on the device.
9221     //
9222     // __tgt_target_teams() launches a target region with one or more teams,
9223     // each with one or more threads.  This call is required for target
9224     // constructs such as:
9225     //  'target teams'
9226     //  'target' / 'teams'
9227     //  'target teams distribute parallel for'
9228     //  'target parallel'
9229     // and so on.
9230     //
9231     // Note that on the host and CPU targets, the runtime implementation of
9232     // these calls simply call the outlined function without forking threads.
9233     // The outlined functions themselves have runtime calls to
9234     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9235     // the compiler in emitTeamsCall() and emitParallelCall().
9236     //
9237     // In contrast, on the NVPTX target, the implementation of
9238     // __tgt_target_teams() launches a GPU kernel with the requested number
9239     // of teams and threads so no additional calls to the runtime are required.
9240     if (NumTeams) {
9241       // If we have NumTeams defined this means that we have an enclosed teams
9242       // region. Therefore we also expect to have NumThreads defined. These two
9243       // values should be defined in the presence of a teams directive,
9244       // regardless of having any clauses associated. If the user is using teams
9245       // but no clauses, these two values will be the default that should be
9246       // passed to the runtime library - a 32-bit integer with the value zero.
9247       assert(NumThreads && "Thread limit expression should be available along "
9248                            "with number of teams.");
9249       llvm::Value *OffloadingArgs[] = {DeviceID,
9250                                        OutlinedFnID,
9251                                        PointerNum,
9252                                        InputInfo.BasePointersArray.getPointer(),
9253                                        InputInfo.PointersArray.getPointer(),
9254                                        InputInfo.SizesArray.getPointer(),
9255                                        MapTypesArray,
9256                                        NumTeams,
9257                                        NumThreads};
9258       Return = CGF.EmitRuntimeCall(
9259           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9260                                           : OMPRTL__tgt_target_teams),
9261           OffloadingArgs);
9262     } else {
9263       llvm::Value *OffloadingArgs[] = {DeviceID,
9264                                        OutlinedFnID,
9265                                        PointerNum,
9266                                        InputInfo.BasePointersArray.getPointer(),
9267                                        InputInfo.PointersArray.getPointer(),
9268                                        InputInfo.SizesArray.getPointer(),
9269                                        MapTypesArray};
9270       Return = CGF.EmitRuntimeCall(
9271           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9272                                           : OMPRTL__tgt_target),
9273           OffloadingArgs);
9274     }
9275 
9276     // Check the error code and execute the host version if required.
9277     llvm::BasicBlock *OffloadFailedBlock =
9278         CGF.createBasicBlock("omp_offload.failed");
9279     llvm::BasicBlock *OffloadContBlock =
9280         CGF.createBasicBlock("omp_offload.cont");
9281     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9282     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9283 
9284     CGF.EmitBlock(OffloadFailedBlock);
9285     if (RequiresOuterTask) {
9286       CapturedVars.clear();
9287       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9288     }
9289     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9290     CGF.EmitBranch(OffloadContBlock);
9291 
9292     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9293   };
9294 
9295   // Notify that the host version must be executed.
9296   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9297                     RequiresOuterTask](CodeGenFunction &CGF,
9298                                        PrePostActionTy &) {
9299     if (RequiresOuterTask) {
9300       CapturedVars.clear();
9301       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9302     }
9303     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9304   };
9305 
9306   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9307                           &CapturedVars, RequiresOuterTask,
9308                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9309     // Fill up the arrays with all the captured variables.
9310     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9311     MappableExprsHandler::MapValuesArrayTy Pointers;
9312     MappableExprsHandler::MapValuesArrayTy Sizes;
9313     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9314 
9315     // Get mappable expression information.
9316     MappableExprsHandler MEHandler(D, CGF);
9317     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9318 
9319     auto RI = CS.getCapturedRecordDecl()->field_begin();
9320     auto CV = CapturedVars.begin();
9321     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9322                                               CE = CS.capture_end();
9323          CI != CE; ++CI, ++RI, ++CV) {
9324       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9325       MappableExprsHandler::MapValuesArrayTy CurPointers;
9326       MappableExprsHandler::MapValuesArrayTy CurSizes;
9327       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9328       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9329 
9330       // VLA sizes are passed to the outlined region by copy and do not have map
9331       // information associated.
9332       if (CI->capturesVariableArrayType()) {
9333         CurBasePointers.push_back(*CV);
9334         CurPointers.push_back(*CV);
9335         CurSizes.push_back(CGF.Builder.CreateIntCast(
9336             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9337         // Copy to the device as an argument. No need to retrieve it.
9338         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9339                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9340                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9341       } else {
9342         // If we have any information in the map clause, we use it, otherwise we
9343         // just do a default mapping.
9344         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9345                                          CurSizes, CurMapTypes, PartialStruct);
9346         if (CurBasePointers.empty())
9347           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9348                                            CurPointers, CurSizes, CurMapTypes);
9349         // Generate correct mapping for variables captured by reference in
9350         // lambdas.
9351         if (CI->capturesVariable())
9352           MEHandler.generateInfoForLambdaCaptures(
9353               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9354               CurMapTypes, LambdaPointers);
9355       }
9356       // We expect to have at least an element of information for this capture.
9357       assert(!CurBasePointers.empty() &&
9358              "Non-existing map pointer for capture!");
9359       assert(CurBasePointers.size() == CurPointers.size() &&
9360              CurBasePointers.size() == CurSizes.size() &&
9361              CurBasePointers.size() == CurMapTypes.size() &&
9362              "Inconsistent map information sizes!");
9363 
9364       // If there is an entry in PartialStruct it means we have a struct with
9365       // individual members mapped. Emit an extra combined entry.
9366       if (PartialStruct.Base.isValid())
9367         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9368                                     CurMapTypes, PartialStruct);
9369 
9370       // We need to append the results of this capture to what we already have.
9371       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9372       Pointers.append(CurPointers.begin(), CurPointers.end());
9373       Sizes.append(CurSizes.begin(), CurSizes.end());
9374       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9375     }
9376     // Adjust MEMBER_OF flags for the lambdas captures.
9377     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9378                                               Pointers, MapTypes);
9379     // Map other list items in the map clause which are not captured variables
9380     // but "declare target link" global variables.
9381     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9382                                                MapTypes);
9383 
9384     TargetDataInfo Info;
9385     // Fill up the arrays and create the arguments.
9386     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9387     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9388                                  Info.PointersArray, Info.SizesArray,
9389                                  Info.MapTypesArray, Info);
9390     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9391     InputInfo.BasePointersArray =
9392         Address(Info.BasePointersArray, CGM.getPointerAlign());
9393     InputInfo.PointersArray =
9394         Address(Info.PointersArray, CGM.getPointerAlign());
9395     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9396     MapTypesArray = Info.MapTypesArray;
9397     if (RequiresOuterTask)
9398       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9399     else
9400       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9401   };
9402 
9403   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9404                              CodeGenFunction &CGF, PrePostActionTy &) {
9405     if (RequiresOuterTask) {
9406       CodeGenFunction::OMPTargetDataInfo InputInfo;
9407       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9408     } else {
9409       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9410     }
9411   };
9412 
9413   // If we have a target function ID it means that we need to support
9414   // offloading, otherwise, just execute on the host. We need to execute on host
9415   // regardless of the conditional in the if clause if, e.g., the user do not
9416   // specify target triples.
9417   if (OutlinedFnID) {
9418     if (IfCond) {
9419       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9420     } else {
9421       RegionCodeGenTy ThenRCG(TargetThenGen);
9422       ThenRCG(CGF);
9423     }
9424   } else {
9425     RegionCodeGenTy ElseRCG(TargetElseGen);
9426     ElseRCG(CGF);
9427   }
9428 }
9429 
9430 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9431                                                     StringRef ParentName) {
9432   if (!S)
9433     return;
9434 
9435   // Codegen OMP target directives that offload compute to the device.
9436   bool RequiresDeviceCodegen =
9437       isa<OMPExecutableDirective>(S) &&
9438       isOpenMPTargetExecutionDirective(
9439           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9440 
9441   if (RequiresDeviceCodegen) {
9442     const auto &E = *cast<OMPExecutableDirective>(S);
9443     unsigned DeviceID;
9444     unsigned FileID;
9445     unsigned Line;
9446     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9447                              FileID, Line);
9448 
9449     // Is this a target region that should not be emitted as an entry point? If
9450     // so just signal we are done with this target region.
9451     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9452                                                             ParentName, Line))
9453       return;
9454 
9455     switch (E.getDirectiveKind()) {
9456     case OMPD_target:
9457       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9458                                                    cast<OMPTargetDirective>(E));
9459       break;
9460     case OMPD_target_parallel:
9461       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9462           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9463       break;
9464     case OMPD_target_teams:
9465       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9466           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9467       break;
9468     case OMPD_target_teams_distribute:
9469       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9470           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9471       break;
9472     case OMPD_target_teams_distribute_simd:
9473       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9474           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9475       break;
9476     case OMPD_target_parallel_for:
9477       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9478           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9479       break;
9480     case OMPD_target_parallel_for_simd:
9481       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9482           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9483       break;
9484     case OMPD_target_simd:
9485       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9486           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9487       break;
9488     case OMPD_target_teams_distribute_parallel_for:
9489       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9490           CGM, ParentName,
9491           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9492       break;
9493     case OMPD_target_teams_distribute_parallel_for_simd:
9494       CodeGenFunction::
9495           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9496               CGM, ParentName,
9497               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9498       break;
9499     case OMPD_parallel:
9500     case OMPD_for:
9501     case OMPD_parallel_for:
9502     case OMPD_parallel_master:
9503     case OMPD_parallel_sections:
9504     case OMPD_for_simd:
9505     case OMPD_parallel_for_simd:
9506     case OMPD_cancel:
9507     case OMPD_cancellation_point:
9508     case OMPD_ordered:
9509     case OMPD_threadprivate:
9510     case OMPD_allocate:
9511     case OMPD_task:
9512     case OMPD_simd:
9513     case OMPD_sections:
9514     case OMPD_section:
9515     case OMPD_single:
9516     case OMPD_master:
9517     case OMPD_critical:
9518     case OMPD_taskyield:
9519     case OMPD_barrier:
9520     case OMPD_taskwait:
9521     case OMPD_taskgroup:
9522     case OMPD_atomic:
9523     case OMPD_flush:
9524     case OMPD_teams:
9525     case OMPD_target_data:
9526     case OMPD_target_exit_data:
9527     case OMPD_target_enter_data:
9528     case OMPD_distribute:
9529     case OMPD_distribute_simd:
9530     case OMPD_distribute_parallel_for:
9531     case OMPD_distribute_parallel_for_simd:
9532     case OMPD_teams_distribute:
9533     case OMPD_teams_distribute_simd:
9534     case OMPD_teams_distribute_parallel_for:
9535     case OMPD_teams_distribute_parallel_for_simd:
9536     case OMPD_target_update:
9537     case OMPD_declare_simd:
9538     case OMPD_declare_variant:
9539     case OMPD_declare_target:
9540     case OMPD_end_declare_target:
9541     case OMPD_declare_reduction:
9542     case OMPD_declare_mapper:
9543     case OMPD_taskloop:
9544     case OMPD_taskloop_simd:
9545     case OMPD_master_taskloop:
9546     case OMPD_master_taskloop_simd:
9547     case OMPD_parallel_master_taskloop:
9548     case OMPD_parallel_master_taskloop_simd:
9549     case OMPD_requires:
9550     case OMPD_unknown:
9551       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9552     }
9553     return;
9554   }
9555 
9556   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9557     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9558       return;
9559 
9560     scanForTargetRegionsFunctions(
9561         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9562     return;
9563   }
9564 
9565   // If this is a lambda function, look into its body.
9566   if (const auto *L = dyn_cast<LambdaExpr>(S))
9567     S = L->getBody();
9568 
9569   // Keep looking for target regions recursively.
9570   for (const Stmt *II : S->children())
9571     scanForTargetRegionsFunctions(II, ParentName);
9572 }
9573 
9574 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9575   // If emitting code for the host, we do not process FD here. Instead we do
9576   // the normal code generation.
9577   if (!CGM.getLangOpts().OpenMPIsDevice) {
9578     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9579       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9580           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9581       // Do not emit device_type(nohost) functions for the host.
9582       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9583         return true;
9584     }
9585     return false;
9586   }
9587 
9588   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9589   // Try to detect target regions in the function.
9590   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9591     StringRef Name = CGM.getMangledName(GD);
9592     scanForTargetRegionsFunctions(FD->getBody(), Name);
9593     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9594         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9595     // Do not emit device_type(nohost) functions for the host.
9596     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9597       return true;
9598   }
9599 
9600   // Do not to emit function if it is not marked as declare target.
9601   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9602          AlreadyEmittedTargetDecls.count(VD) == 0;
9603 }
9604 
9605 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9606   if (!CGM.getLangOpts().OpenMPIsDevice)
9607     return false;
9608 
9609   // Check if there are Ctors/Dtors in this declaration and look for target
9610   // regions in it. We use the complete variant to produce the kernel name
9611   // mangling.
9612   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9613   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9614     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9615       StringRef ParentName =
9616           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9617       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9618     }
9619     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9620       StringRef ParentName =
9621           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9622       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9623     }
9624   }
9625 
9626   // Do not to emit variable if it is not marked as declare target.
9627   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9628       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9629           cast<VarDecl>(GD.getDecl()));
9630   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9631       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9632        HasRequiresUnifiedSharedMemory)) {
9633     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9634     return true;
9635   }
9636   return false;
9637 }
9638 
9639 llvm::Constant *
9640 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9641                                                 const VarDecl *VD) {
9642   assert(VD->getType().isConstant(CGM.getContext()) &&
9643          "Expected constant variable.");
9644   StringRef VarName;
9645   llvm::Constant *Addr;
9646   llvm::GlobalValue::LinkageTypes Linkage;
9647   QualType Ty = VD->getType();
9648   SmallString<128> Buffer;
9649   {
9650     unsigned DeviceID;
9651     unsigned FileID;
9652     unsigned Line;
9653     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9654                              FileID, Line);
9655     llvm::raw_svector_ostream OS(Buffer);
9656     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9657        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9658     VarName = OS.str();
9659   }
9660   Linkage = llvm::GlobalValue::InternalLinkage;
9661   Addr =
9662       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9663                                   getDefaultFirstprivateAddressSpace());
9664   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9665   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9666   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9667   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9668       VarName, Addr, VarSize,
9669       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9670   return Addr;
9671 }
9672 
9673 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9674                                                    llvm::Constant *Addr) {
9675   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9676       !CGM.getLangOpts().OpenMPIsDevice)
9677     return;
9678   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9679       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9680   if (!Res) {
9681     if (CGM.getLangOpts().OpenMPIsDevice) {
9682       // Register non-target variables being emitted in device code (debug info
9683       // may cause this).
9684       StringRef VarName = CGM.getMangledName(VD);
9685       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9686     }
9687     return;
9688   }
9689   // Register declare target variables.
9690   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9691   StringRef VarName;
9692   CharUnits VarSize;
9693   llvm::GlobalValue::LinkageTypes Linkage;
9694 
9695   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9696       !HasRequiresUnifiedSharedMemory) {
9697     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9698     VarName = CGM.getMangledName(VD);
9699     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9700       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9701       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9702     } else {
9703       VarSize = CharUnits::Zero();
9704     }
9705     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9706     // Temp solution to prevent optimizations of the internal variables.
9707     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9708       std::string RefName = getName({VarName, "ref"});
9709       if (!CGM.GetGlobalValue(RefName)) {
9710         llvm::Constant *AddrRef =
9711             getOrCreateInternalVariable(Addr->getType(), RefName);
9712         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9713         GVAddrRef->setConstant(/*Val=*/true);
9714         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9715         GVAddrRef->setInitializer(Addr);
9716         CGM.addCompilerUsedGlobal(GVAddrRef);
9717       }
9718     }
9719   } else {
9720     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9721             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9722              HasRequiresUnifiedSharedMemory)) &&
9723            "Declare target attribute must link or to with unified memory.");
9724     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9725       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9726     else
9727       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9728 
9729     if (CGM.getLangOpts().OpenMPIsDevice) {
9730       VarName = Addr->getName();
9731       Addr = nullptr;
9732     } else {
9733       VarName = getAddrOfDeclareTargetVar(VD).getName();
9734       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9735     }
9736     VarSize = CGM.getPointerSize();
9737     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9738   }
9739 
9740   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9741       VarName, Addr, VarSize, Flags, Linkage);
9742 }
9743 
9744 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9745   if (isa<FunctionDecl>(GD.getDecl()) ||
9746       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9747     return emitTargetFunctions(GD);
9748 
9749   return emitTargetGlobalVariable(GD);
9750 }
9751 
9752 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9753   for (const VarDecl *VD : DeferredGlobalVariables) {
9754     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9755         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9756     if (!Res)
9757       continue;
9758     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9759         !HasRequiresUnifiedSharedMemory) {
9760       CGM.EmitGlobal(VD);
9761     } else {
9762       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9763               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9764                HasRequiresUnifiedSharedMemory)) &&
9765              "Expected link clause or to clause with unified memory.");
9766       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9767     }
9768   }
9769 }
9770 
9771 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9772     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9773   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9774          " Expected target-based directive.");
9775 }
9776 
9777 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9778     const OMPRequiresDecl *D) {
9779   for (const OMPClause *Clause : D->clauselists()) {
9780     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9781       HasRequiresUnifiedSharedMemory = true;
9782       break;
9783     }
9784   }
9785 }
9786 
9787 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9788                                                        LangAS &AS) {
9789   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9790     return false;
9791   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9792   switch(A->getAllocatorType()) {
9793   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9794   // Not supported, fallback to the default mem space.
9795   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9796   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9797   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9798   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9799   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9800   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9801   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9802     AS = LangAS::Default;
9803     return true;
9804   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9805     llvm_unreachable("Expected predefined allocator for the variables with the "
9806                      "static storage.");
9807   }
9808   return false;
9809 }
9810 
9811 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9812   return HasRequiresUnifiedSharedMemory;
9813 }
9814 
9815 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9816     CodeGenModule &CGM)
9817     : CGM(CGM) {
9818   if (CGM.getLangOpts().OpenMPIsDevice) {
9819     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9820     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9821   }
9822 }
9823 
9824 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9825   if (CGM.getLangOpts().OpenMPIsDevice)
9826     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9827 }
9828 
9829 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9830   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9831     return true;
9832 
9833   const auto *D = cast<FunctionDecl>(GD.getDecl());
9834   // Do not to emit function if it is marked as declare target as it was already
9835   // emitted.
9836   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9837     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9838       if (auto *F = dyn_cast_or_null<llvm::Function>(
9839               CGM.GetGlobalValue(CGM.getMangledName(GD))))
9840         return !F->isDeclaration();
9841       return false;
9842     }
9843     return true;
9844   }
9845 
9846   return !AlreadyEmittedTargetDecls.insert(D).second;
9847 }
9848 
9849 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9850   // If we don't have entries or if we are emitting code for the device, we
9851   // don't need to do anything.
9852   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9853       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9854       (OffloadEntriesInfoManager.empty() &&
9855        !HasEmittedDeclareTargetRegion &&
9856        !HasEmittedTargetRegion))
9857     return nullptr;
9858 
9859   // Create and register the function that handles the requires directives.
9860   ASTContext &C = CGM.getContext();
9861 
9862   llvm::Function *RequiresRegFn;
9863   {
9864     CodeGenFunction CGF(CGM);
9865     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9866     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9867     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9868     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9869     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9870     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9871     // TODO: check for other requires clauses.
9872     // The requires directive takes effect only when a target region is
9873     // present in the compilation unit. Otherwise it is ignored and not
9874     // passed to the runtime. This avoids the runtime from throwing an error
9875     // for mismatching requires clauses across compilation units that don't
9876     // contain at least 1 target region.
9877     assert((HasEmittedTargetRegion ||
9878             HasEmittedDeclareTargetRegion ||
9879             !OffloadEntriesInfoManager.empty()) &&
9880            "Target or declare target region expected.");
9881     if (HasRequiresUnifiedSharedMemory)
9882       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9883     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9884         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9885     CGF.FinishFunction();
9886   }
9887   return RequiresRegFn;
9888 }
9889 
9890 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9891                                     const OMPExecutableDirective &D,
9892                                     SourceLocation Loc,
9893                                     llvm::Function *OutlinedFn,
9894                                     ArrayRef<llvm::Value *> CapturedVars) {
9895   if (!CGF.HaveInsertPoint())
9896     return;
9897 
9898   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9899   CodeGenFunction::RunCleanupsScope Scope(CGF);
9900 
9901   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9902   llvm::Value *Args[] = {
9903       RTLoc,
9904       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9905       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9906   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9907   RealArgs.append(std::begin(Args), std::end(Args));
9908   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9909 
9910   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9911   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9912 }
9913 
9914 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9915                                          const Expr *NumTeams,
9916                                          const Expr *ThreadLimit,
9917                                          SourceLocation Loc) {
9918   if (!CGF.HaveInsertPoint())
9919     return;
9920 
9921   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9922 
9923   llvm::Value *NumTeamsVal =
9924       NumTeams
9925           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9926                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9927           : CGF.Builder.getInt32(0);
9928 
9929   llvm::Value *ThreadLimitVal =
9930       ThreadLimit
9931           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9932                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9933           : CGF.Builder.getInt32(0);
9934 
9935   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9936   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9937                                      ThreadLimitVal};
9938   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9939                       PushNumTeamsArgs);
9940 }
9941 
9942 void CGOpenMPRuntime::emitTargetDataCalls(
9943     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9944     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9945   if (!CGF.HaveInsertPoint())
9946     return;
9947 
9948   // Action used to replace the default codegen action and turn privatization
9949   // off.
9950   PrePostActionTy NoPrivAction;
9951 
9952   // Generate the code for the opening of the data environment. Capture all the
9953   // arguments of the runtime call by reference because they are used in the
9954   // closing of the region.
9955   auto &&BeginThenGen = [this, &D, Device, &Info,
9956                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9957     // Fill up the arrays with all the mapped variables.
9958     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9959     MappableExprsHandler::MapValuesArrayTy Pointers;
9960     MappableExprsHandler::MapValuesArrayTy Sizes;
9961     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9962 
9963     // Get map clause information.
9964     MappableExprsHandler MCHandler(D, CGF);
9965     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9966 
9967     // Fill up the arrays and create the arguments.
9968     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9969 
9970     llvm::Value *BasePointersArrayArg = nullptr;
9971     llvm::Value *PointersArrayArg = nullptr;
9972     llvm::Value *SizesArrayArg = nullptr;
9973     llvm::Value *MapTypesArrayArg = nullptr;
9974     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9975                                  SizesArrayArg, MapTypesArrayArg, Info);
9976 
9977     // Emit device ID if any.
9978     llvm::Value *DeviceID = nullptr;
9979     if (Device) {
9980       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9981                                            CGF.Int64Ty, /*isSigned=*/true);
9982     } else {
9983       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9984     }
9985 
9986     // Emit the number of elements in the offloading arrays.
9987     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9988 
9989     llvm::Value *OffloadingArgs[] = {
9990         DeviceID,         PointerNum,    BasePointersArrayArg,
9991         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9992     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9993                         OffloadingArgs);
9994 
9995     // If device pointer privatization is required, emit the body of the region
9996     // here. It will have to be duplicated: with and without privatization.
9997     if (!Info.CaptureDeviceAddrMap.empty())
9998       CodeGen(CGF);
9999   };
10000 
10001   // Generate code for the closing of the data region.
10002   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10003                                             PrePostActionTy &) {
10004     assert(Info.isValid() && "Invalid data environment closing arguments.");
10005 
10006     llvm::Value *BasePointersArrayArg = nullptr;
10007     llvm::Value *PointersArrayArg = nullptr;
10008     llvm::Value *SizesArrayArg = nullptr;
10009     llvm::Value *MapTypesArrayArg = nullptr;
10010     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10011                                  SizesArrayArg, MapTypesArrayArg, Info);
10012 
10013     // Emit device ID if any.
10014     llvm::Value *DeviceID = nullptr;
10015     if (Device) {
10016       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10017                                            CGF.Int64Ty, /*isSigned=*/true);
10018     } else {
10019       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10020     }
10021 
10022     // Emit the number of elements in the offloading arrays.
10023     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10024 
10025     llvm::Value *OffloadingArgs[] = {
10026         DeviceID,         PointerNum,    BasePointersArrayArg,
10027         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10028     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10029                         OffloadingArgs);
10030   };
10031 
10032   // If we need device pointer privatization, we need to emit the body of the
10033   // region with no privatization in the 'else' branch of the conditional.
10034   // Otherwise, we don't have to do anything.
10035   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10036                                                          PrePostActionTy &) {
10037     if (!Info.CaptureDeviceAddrMap.empty()) {
10038       CodeGen.setAction(NoPrivAction);
10039       CodeGen(CGF);
10040     }
10041   };
10042 
10043   // We don't have to do anything to close the region if the if clause evaluates
10044   // to false.
10045   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10046 
10047   if (IfCond) {
10048     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10049   } else {
10050     RegionCodeGenTy RCG(BeginThenGen);
10051     RCG(CGF);
10052   }
10053 
10054   // If we don't require privatization of device pointers, we emit the body in
10055   // between the runtime calls. This avoids duplicating the body code.
10056   if (Info.CaptureDeviceAddrMap.empty()) {
10057     CodeGen.setAction(NoPrivAction);
10058     CodeGen(CGF);
10059   }
10060 
10061   if (IfCond) {
10062     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10063   } else {
10064     RegionCodeGenTy RCG(EndThenGen);
10065     RCG(CGF);
10066   }
10067 }
10068 
10069 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10070     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10071     const Expr *Device) {
10072   if (!CGF.HaveInsertPoint())
10073     return;
10074 
10075   assert((isa<OMPTargetEnterDataDirective>(D) ||
10076           isa<OMPTargetExitDataDirective>(D) ||
10077           isa<OMPTargetUpdateDirective>(D)) &&
10078          "Expecting either target enter, exit data, or update directives.");
10079 
10080   CodeGenFunction::OMPTargetDataInfo InputInfo;
10081   llvm::Value *MapTypesArray = nullptr;
10082   // Generate the code for the opening of the data environment.
10083   auto &&ThenGen = [this, &D, Device, &InputInfo,
10084                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10085     // Emit device ID if any.
10086     llvm::Value *DeviceID = nullptr;
10087     if (Device) {
10088       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10089                                            CGF.Int64Ty, /*isSigned=*/true);
10090     } else {
10091       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10092     }
10093 
10094     // Emit the number of elements in the offloading arrays.
10095     llvm::Constant *PointerNum =
10096         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10097 
10098     llvm::Value *OffloadingArgs[] = {DeviceID,
10099                                      PointerNum,
10100                                      InputInfo.BasePointersArray.getPointer(),
10101                                      InputInfo.PointersArray.getPointer(),
10102                                      InputInfo.SizesArray.getPointer(),
10103                                      MapTypesArray};
10104 
10105     // Select the right runtime function call for each expected standalone
10106     // directive.
10107     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10108     OpenMPRTLFunction RTLFn;
10109     switch (D.getDirectiveKind()) {
10110     case OMPD_target_enter_data:
10111       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10112                         : OMPRTL__tgt_target_data_begin;
10113       break;
10114     case OMPD_target_exit_data:
10115       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10116                         : OMPRTL__tgt_target_data_end;
10117       break;
10118     case OMPD_target_update:
10119       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10120                         : OMPRTL__tgt_target_data_update;
10121       break;
10122     case OMPD_parallel:
10123     case OMPD_for:
10124     case OMPD_parallel_for:
10125     case OMPD_parallel_master:
10126     case OMPD_parallel_sections:
10127     case OMPD_for_simd:
10128     case OMPD_parallel_for_simd:
10129     case OMPD_cancel:
10130     case OMPD_cancellation_point:
10131     case OMPD_ordered:
10132     case OMPD_threadprivate:
10133     case OMPD_allocate:
10134     case OMPD_task:
10135     case OMPD_simd:
10136     case OMPD_sections:
10137     case OMPD_section:
10138     case OMPD_single:
10139     case OMPD_master:
10140     case OMPD_critical:
10141     case OMPD_taskyield:
10142     case OMPD_barrier:
10143     case OMPD_taskwait:
10144     case OMPD_taskgroup:
10145     case OMPD_atomic:
10146     case OMPD_flush:
10147     case OMPD_teams:
10148     case OMPD_target_data:
10149     case OMPD_distribute:
10150     case OMPD_distribute_simd:
10151     case OMPD_distribute_parallel_for:
10152     case OMPD_distribute_parallel_for_simd:
10153     case OMPD_teams_distribute:
10154     case OMPD_teams_distribute_simd:
10155     case OMPD_teams_distribute_parallel_for:
10156     case OMPD_teams_distribute_parallel_for_simd:
10157     case OMPD_declare_simd:
10158     case OMPD_declare_variant:
10159     case OMPD_declare_target:
10160     case OMPD_end_declare_target:
10161     case OMPD_declare_reduction:
10162     case OMPD_declare_mapper:
10163     case OMPD_taskloop:
10164     case OMPD_taskloop_simd:
10165     case OMPD_master_taskloop:
10166     case OMPD_master_taskloop_simd:
10167     case OMPD_parallel_master_taskloop:
10168     case OMPD_parallel_master_taskloop_simd:
10169     case OMPD_target:
10170     case OMPD_target_simd:
10171     case OMPD_target_teams_distribute:
10172     case OMPD_target_teams_distribute_simd:
10173     case OMPD_target_teams_distribute_parallel_for:
10174     case OMPD_target_teams_distribute_parallel_for_simd:
10175     case OMPD_target_teams:
10176     case OMPD_target_parallel:
10177     case OMPD_target_parallel_for:
10178     case OMPD_target_parallel_for_simd:
10179     case OMPD_requires:
10180     case OMPD_unknown:
10181       llvm_unreachable("Unexpected standalone target data directive.");
10182       break;
10183     }
10184     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10185   };
10186 
10187   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10188                              CodeGenFunction &CGF, PrePostActionTy &) {
10189     // Fill up the arrays with all the mapped variables.
10190     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10191     MappableExprsHandler::MapValuesArrayTy Pointers;
10192     MappableExprsHandler::MapValuesArrayTy Sizes;
10193     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10194 
10195     // Get map clause information.
10196     MappableExprsHandler MEHandler(D, CGF);
10197     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10198 
10199     TargetDataInfo Info;
10200     // Fill up the arrays and create the arguments.
10201     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10202     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10203                                  Info.PointersArray, Info.SizesArray,
10204                                  Info.MapTypesArray, Info);
10205     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10206     InputInfo.BasePointersArray =
10207         Address(Info.BasePointersArray, CGM.getPointerAlign());
10208     InputInfo.PointersArray =
10209         Address(Info.PointersArray, CGM.getPointerAlign());
10210     InputInfo.SizesArray =
10211         Address(Info.SizesArray, CGM.getPointerAlign());
10212     MapTypesArray = Info.MapTypesArray;
10213     if (D.hasClausesOfKind<OMPDependClause>())
10214       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10215     else
10216       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10217   };
10218 
10219   if (IfCond) {
10220     emitIfClause(CGF, IfCond, TargetThenGen,
10221                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10222   } else {
10223     RegionCodeGenTy ThenRCG(TargetThenGen);
10224     ThenRCG(CGF);
10225   }
10226 }
10227 
10228 namespace {
10229   /// Kind of parameter in a function with 'declare simd' directive.
10230   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10231   /// Attribute set of the parameter.
10232   struct ParamAttrTy {
10233     ParamKindTy Kind = Vector;
10234     llvm::APSInt StrideOrArg;
10235     llvm::APSInt Alignment;
10236   };
10237 } // namespace
10238 
10239 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10240                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10241   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10242   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10243   // of that clause. The VLEN value must be power of 2.
10244   // In other case the notion of the function`s "characteristic data type" (CDT)
10245   // is used to compute the vector length.
10246   // CDT is defined in the following order:
10247   //   a) For non-void function, the CDT is the return type.
10248   //   b) If the function has any non-uniform, non-linear parameters, then the
10249   //   CDT is the type of the first such parameter.
10250   //   c) If the CDT determined by a) or b) above is struct, union, or class
10251   //   type which is pass-by-value (except for the type that maps to the
10252   //   built-in complex data type), the characteristic data type is int.
10253   //   d) If none of the above three cases is applicable, the CDT is int.
10254   // The VLEN is then determined based on the CDT and the size of vector
10255   // register of that ISA for which current vector version is generated. The
10256   // VLEN is computed using the formula below:
10257   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10258   // where vector register size specified in section 3.2.1 Registers and the
10259   // Stack Frame of original AMD64 ABI document.
10260   QualType RetType = FD->getReturnType();
10261   if (RetType.isNull())
10262     return 0;
10263   ASTContext &C = FD->getASTContext();
10264   QualType CDT;
10265   if (!RetType.isNull() && !RetType->isVoidType()) {
10266     CDT = RetType;
10267   } else {
10268     unsigned Offset = 0;
10269     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10270       if (ParamAttrs[Offset].Kind == Vector)
10271         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10272       ++Offset;
10273     }
10274     if (CDT.isNull()) {
10275       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10276         if (ParamAttrs[I + Offset].Kind == Vector) {
10277           CDT = FD->getParamDecl(I)->getType();
10278           break;
10279         }
10280       }
10281     }
10282   }
10283   if (CDT.isNull())
10284     CDT = C.IntTy;
10285   CDT = CDT->getCanonicalTypeUnqualified();
10286   if (CDT->isRecordType() || CDT->isUnionType())
10287     CDT = C.IntTy;
10288   return C.getTypeSize(CDT);
10289 }
10290 
10291 static void
10292 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10293                            const llvm::APSInt &VLENVal,
10294                            ArrayRef<ParamAttrTy> ParamAttrs,
10295                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10296   struct ISADataTy {
10297     char ISA;
10298     unsigned VecRegSize;
10299   };
10300   ISADataTy ISAData[] = {
10301       {
10302           'b', 128
10303       }, // SSE
10304       {
10305           'c', 256
10306       }, // AVX
10307       {
10308           'd', 256
10309       }, // AVX2
10310       {
10311           'e', 512
10312       }, // AVX512
10313   };
10314   llvm::SmallVector<char, 2> Masked;
10315   switch (State) {
10316   case OMPDeclareSimdDeclAttr::BS_Undefined:
10317     Masked.push_back('N');
10318     Masked.push_back('M');
10319     break;
10320   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10321     Masked.push_back('N');
10322     break;
10323   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10324     Masked.push_back('M');
10325     break;
10326   }
10327   for (char Mask : Masked) {
10328     for (const ISADataTy &Data : ISAData) {
10329       SmallString<256> Buffer;
10330       llvm::raw_svector_ostream Out(Buffer);
10331       Out << "_ZGV" << Data.ISA << Mask;
10332       if (!VLENVal) {
10333         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10334         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10335         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10336       } else {
10337         Out << VLENVal;
10338       }
10339       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10340         switch (ParamAttr.Kind){
10341         case LinearWithVarStride:
10342           Out << 's' << ParamAttr.StrideOrArg;
10343           break;
10344         case Linear:
10345           Out << 'l';
10346           if (!!ParamAttr.StrideOrArg)
10347             Out << ParamAttr.StrideOrArg;
10348           break;
10349         case Uniform:
10350           Out << 'u';
10351           break;
10352         case Vector:
10353           Out << 'v';
10354           break;
10355         }
10356         if (!!ParamAttr.Alignment)
10357           Out << 'a' << ParamAttr.Alignment;
10358       }
10359       Out << '_' << Fn->getName();
10360       Fn->addFnAttr(Out.str());
10361     }
10362   }
10363 }
10364 
10365 // This are the Functions that are needed to mangle the name of the
10366 // vector functions generated by the compiler, according to the rules
10367 // defined in the "Vector Function ABI specifications for AArch64",
10368 // available at
10369 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10370 
10371 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10372 ///
10373 /// TODO: Need to implement the behavior for reference marked with a
10374 /// var or no linear modifiers (1.b in the section). For this, we
10375 /// need to extend ParamKindTy to support the linear modifiers.
10376 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10377   QT = QT.getCanonicalType();
10378 
10379   if (QT->isVoidType())
10380     return false;
10381 
10382   if (Kind == ParamKindTy::Uniform)
10383     return false;
10384 
10385   if (Kind == ParamKindTy::Linear)
10386     return false;
10387 
10388   // TODO: Handle linear references with modifiers
10389 
10390   if (Kind == ParamKindTy::LinearWithVarStride)
10391     return false;
10392 
10393   return true;
10394 }
10395 
10396 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10397 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10398   QT = QT.getCanonicalType();
10399   unsigned Size = C.getTypeSize(QT);
10400 
10401   // Only scalars and complex within 16 bytes wide set PVB to true.
10402   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10403     return false;
10404 
10405   if (QT->isFloatingType())
10406     return true;
10407 
10408   if (QT->isIntegerType())
10409     return true;
10410 
10411   if (QT->isPointerType())
10412     return true;
10413 
10414   // TODO: Add support for complex types (section 3.1.2, item 2).
10415 
10416   return false;
10417 }
10418 
10419 /// Computes the lane size (LS) of a return type or of an input parameter,
10420 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10421 /// TODO: Add support for references, section 3.2.1, item 1.
10422 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10423   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10424     QualType PTy = QT.getCanonicalType()->getPointeeType();
10425     if (getAArch64PBV(PTy, C))
10426       return C.getTypeSize(PTy);
10427   }
10428   if (getAArch64PBV(QT, C))
10429     return C.getTypeSize(QT);
10430 
10431   return C.getTypeSize(C.getUIntPtrType());
10432 }
10433 
10434 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10435 // signature of the scalar function, as defined in 3.2.2 of the
10436 // AAVFABI.
10437 static std::tuple<unsigned, unsigned, bool>
10438 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10439   QualType RetType = FD->getReturnType().getCanonicalType();
10440 
10441   ASTContext &C = FD->getASTContext();
10442 
10443   bool OutputBecomesInput = false;
10444 
10445   llvm::SmallVector<unsigned, 8> Sizes;
10446   if (!RetType->isVoidType()) {
10447     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10448     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10449       OutputBecomesInput = true;
10450   }
10451   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10452     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10453     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10454   }
10455 
10456   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10457   // The LS of a function parameter / return value can only be a power
10458   // of 2, starting from 8 bits, up to 128.
10459   assert(std::all_of(Sizes.begin(), Sizes.end(),
10460                      [](unsigned Size) {
10461                        return Size == 8 || Size == 16 || Size == 32 ||
10462                               Size == 64 || Size == 128;
10463                      }) &&
10464          "Invalid size");
10465 
10466   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10467                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10468                          OutputBecomesInput);
10469 }
10470 
10471 /// Mangle the parameter part of the vector function name according to
10472 /// their OpenMP classification. The mangling function is defined in
10473 /// section 3.5 of the AAVFABI.
10474 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10475   SmallString<256> Buffer;
10476   llvm::raw_svector_ostream Out(Buffer);
10477   for (const auto &ParamAttr : ParamAttrs) {
10478     switch (ParamAttr.Kind) {
10479     case LinearWithVarStride:
10480       Out << "ls" << ParamAttr.StrideOrArg;
10481       break;
10482     case Linear:
10483       Out << 'l';
10484       // Don't print the step value if it is not present or if it is
10485       // equal to 1.
10486       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10487         Out << ParamAttr.StrideOrArg;
10488       break;
10489     case Uniform:
10490       Out << 'u';
10491       break;
10492     case Vector:
10493       Out << 'v';
10494       break;
10495     }
10496 
10497     if (!!ParamAttr.Alignment)
10498       Out << 'a' << ParamAttr.Alignment;
10499   }
10500 
10501   return Out.str();
10502 }
10503 
10504 // Function used to add the attribute. The parameter `VLEN` is
10505 // templated to allow the use of "x" when targeting scalable functions
10506 // for SVE.
10507 template <typename T>
10508 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10509                                  char ISA, StringRef ParSeq,
10510                                  StringRef MangledName, bool OutputBecomesInput,
10511                                  llvm::Function *Fn) {
10512   SmallString<256> Buffer;
10513   llvm::raw_svector_ostream Out(Buffer);
10514   Out << Prefix << ISA << LMask << VLEN;
10515   if (OutputBecomesInput)
10516     Out << "v";
10517   Out << ParSeq << "_" << MangledName;
10518   Fn->addFnAttr(Out.str());
10519 }
10520 
10521 // Helper function to generate the Advanced SIMD names depending on
10522 // the value of the NDS when simdlen is not present.
10523 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10524                                       StringRef Prefix, char ISA,
10525                                       StringRef ParSeq, StringRef MangledName,
10526                                       bool OutputBecomesInput,
10527                                       llvm::Function *Fn) {
10528   switch (NDS) {
10529   case 8:
10530     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10531                          OutputBecomesInput, Fn);
10532     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10533                          OutputBecomesInput, Fn);
10534     break;
10535   case 16:
10536     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10537                          OutputBecomesInput, Fn);
10538     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10539                          OutputBecomesInput, Fn);
10540     break;
10541   case 32:
10542     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10543                          OutputBecomesInput, Fn);
10544     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10545                          OutputBecomesInput, Fn);
10546     break;
10547   case 64:
10548   case 128:
10549     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10550                          OutputBecomesInput, Fn);
10551     break;
10552   default:
10553     llvm_unreachable("Scalar type is too wide.");
10554   }
10555 }
10556 
10557 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10558 static void emitAArch64DeclareSimdFunction(
10559     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10560     ArrayRef<ParamAttrTy> ParamAttrs,
10561     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10562     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10563 
10564   // Get basic data for building the vector signature.
10565   const auto Data = getNDSWDS(FD, ParamAttrs);
10566   const unsigned NDS = std::get<0>(Data);
10567   const unsigned WDS = std::get<1>(Data);
10568   const bool OutputBecomesInput = std::get<2>(Data);
10569 
10570   // Check the values provided via `simdlen` by the user.
10571   // 1. A `simdlen(1)` doesn't produce vector signatures,
10572   if (UserVLEN == 1) {
10573     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10574         DiagnosticsEngine::Warning,
10575         "The clause simdlen(1) has no effect when targeting aarch64.");
10576     CGM.getDiags().Report(SLoc, DiagID);
10577     return;
10578   }
10579 
10580   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10581   // Advanced SIMD output.
10582   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10583     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10584         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10585                                     "power of 2 when targeting Advanced SIMD.");
10586     CGM.getDiags().Report(SLoc, DiagID);
10587     return;
10588   }
10589 
10590   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10591   // limits.
10592   if (ISA == 's' && UserVLEN != 0) {
10593     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10594       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10595           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10596                                       "lanes in the architectural constraints "
10597                                       "for SVE (min is 128-bit, max is "
10598                                       "2048-bit, by steps of 128-bit)");
10599       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10600       return;
10601     }
10602   }
10603 
10604   // Sort out parameter sequence.
10605   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10606   StringRef Prefix = "_ZGV";
10607   // Generate simdlen from user input (if any).
10608   if (UserVLEN) {
10609     if (ISA == 's') {
10610       // SVE generates only a masked function.
10611       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10612                            OutputBecomesInput, Fn);
10613     } else {
10614       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10615       // Advanced SIMD generates one or two functions, depending on
10616       // the `[not]inbranch` clause.
10617       switch (State) {
10618       case OMPDeclareSimdDeclAttr::BS_Undefined:
10619         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10620                              OutputBecomesInput, Fn);
10621         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10622                              OutputBecomesInput, Fn);
10623         break;
10624       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10625         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10626                              OutputBecomesInput, Fn);
10627         break;
10628       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10629         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10630                              OutputBecomesInput, Fn);
10631         break;
10632       }
10633     }
10634   } else {
10635     // If no user simdlen is provided, follow the AAVFABI rules for
10636     // generating the vector length.
10637     if (ISA == 's') {
10638       // SVE, section 3.4.1, item 1.
10639       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10640                            OutputBecomesInput, Fn);
10641     } else {
10642       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10643       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10644       // two vector names depending on the use of the clause
10645       // `[not]inbranch`.
10646       switch (State) {
10647       case OMPDeclareSimdDeclAttr::BS_Undefined:
10648         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10649                                   OutputBecomesInput, Fn);
10650         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10651                                   OutputBecomesInput, Fn);
10652         break;
10653       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10654         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10655                                   OutputBecomesInput, Fn);
10656         break;
10657       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10658         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10659                                   OutputBecomesInput, Fn);
10660         break;
10661       }
10662     }
10663   }
10664 }
10665 
10666 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10667                                               llvm::Function *Fn) {
10668   ASTContext &C = CGM.getContext();
10669   FD = FD->getMostRecentDecl();
10670   // Map params to their positions in function decl.
10671   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10672   if (isa<CXXMethodDecl>(FD))
10673     ParamPositions.try_emplace(FD, 0);
10674   unsigned ParamPos = ParamPositions.size();
10675   for (const ParmVarDecl *P : FD->parameters()) {
10676     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10677     ++ParamPos;
10678   }
10679   while (FD) {
10680     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10681       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10682       // Mark uniform parameters.
10683       for (const Expr *E : Attr->uniforms()) {
10684         E = E->IgnoreParenImpCasts();
10685         unsigned Pos;
10686         if (isa<CXXThisExpr>(E)) {
10687           Pos = ParamPositions[FD];
10688         } else {
10689           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10690                                 ->getCanonicalDecl();
10691           Pos = ParamPositions[PVD];
10692         }
10693         ParamAttrs[Pos].Kind = Uniform;
10694       }
10695       // Get alignment info.
10696       auto NI = Attr->alignments_begin();
10697       for (const Expr *E : Attr->aligneds()) {
10698         E = E->IgnoreParenImpCasts();
10699         unsigned Pos;
10700         QualType ParmTy;
10701         if (isa<CXXThisExpr>(E)) {
10702           Pos = ParamPositions[FD];
10703           ParmTy = E->getType();
10704         } else {
10705           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10706                                 ->getCanonicalDecl();
10707           Pos = ParamPositions[PVD];
10708           ParmTy = PVD->getType();
10709         }
10710         ParamAttrs[Pos].Alignment =
10711             (*NI)
10712                 ? (*NI)->EvaluateKnownConstInt(C)
10713                 : llvm::APSInt::getUnsigned(
10714                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10715                           .getQuantity());
10716         ++NI;
10717       }
10718       // Mark linear parameters.
10719       auto SI = Attr->steps_begin();
10720       auto MI = Attr->modifiers_begin();
10721       for (const Expr *E : Attr->linears()) {
10722         E = E->IgnoreParenImpCasts();
10723         unsigned Pos;
10724         if (isa<CXXThisExpr>(E)) {
10725           Pos = ParamPositions[FD];
10726         } else {
10727           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10728                                 ->getCanonicalDecl();
10729           Pos = ParamPositions[PVD];
10730         }
10731         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10732         ParamAttr.Kind = Linear;
10733         if (*SI) {
10734           Expr::EvalResult Result;
10735           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10736             if (const auto *DRE =
10737                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10738               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10739                 ParamAttr.Kind = LinearWithVarStride;
10740                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10741                     ParamPositions[StridePVD->getCanonicalDecl()]);
10742               }
10743             }
10744           } else {
10745             ParamAttr.StrideOrArg = Result.Val.getInt();
10746           }
10747         }
10748         ++SI;
10749         ++MI;
10750       }
10751       llvm::APSInt VLENVal;
10752       SourceLocation ExprLoc;
10753       const Expr *VLENExpr = Attr->getSimdlen();
10754       if (VLENExpr) {
10755         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10756         ExprLoc = VLENExpr->getExprLoc();
10757       }
10758       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10759       if (CGM.getTriple().isX86()) {
10760         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10761       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10762         unsigned VLEN = VLENVal.getExtValue();
10763         StringRef MangledName = Fn->getName();
10764         if (CGM.getTarget().hasFeature("sve"))
10765           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10766                                          MangledName, 's', 128, Fn, ExprLoc);
10767         if (CGM.getTarget().hasFeature("neon"))
10768           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10769                                          MangledName, 'n', 128, Fn, ExprLoc);
10770       }
10771     }
10772     FD = FD->getPreviousDecl();
10773   }
10774 }
10775 
10776 namespace {
10777 /// Cleanup action for doacross support.
10778 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10779 public:
10780   static const int DoacrossFinArgs = 2;
10781 
10782 private:
10783   llvm::FunctionCallee RTLFn;
10784   llvm::Value *Args[DoacrossFinArgs];
10785 
10786 public:
10787   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10788                     ArrayRef<llvm::Value *> CallArgs)
10789       : RTLFn(RTLFn) {
10790     assert(CallArgs.size() == DoacrossFinArgs);
10791     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10792   }
10793   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10794     if (!CGF.HaveInsertPoint())
10795       return;
10796     CGF.EmitRuntimeCall(RTLFn, Args);
10797   }
10798 };
10799 } // namespace
10800 
10801 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10802                                        const OMPLoopDirective &D,
10803                                        ArrayRef<Expr *> NumIterations) {
10804   if (!CGF.HaveInsertPoint())
10805     return;
10806 
10807   ASTContext &C = CGM.getContext();
10808   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10809   RecordDecl *RD;
10810   if (KmpDimTy.isNull()) {
10811     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10812     //  kmp_int64 lo; // lower
10813     //  kmp_int64 up; // upper
10814     //  kmp_int64 st; // stride
10815     // };
10816     RD = C.buildImplicitRecord("kmp_dim");
10817     RD->startDefinition();
10818     addFieldToRecordDecl(C, RD, Int64Ty);
10819     addFieldToRecordDecl(C, RD, Int64Ty);
10820     addFieldToRecordDecl(C, RD, Int64Ty);
10821     RD->completeDefinition();
10822     KmpDimTy = C.getRecordType(RD);
10823   } else {
10824     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10825   }
10826   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10827   QualType ArrayTy =
10828       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10829 
10830   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10831   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10832   enum { LowerFD = 0, UpperFD, StrideFD };
10833   // Fill dims with data.
10834   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10835     LValue DimsLVal = CGF.MakeAddrLValue(
10836         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10837     // dims.upper = num_iterations;
10838     LValue UpperLVal = CGF.EmitLValueForField(
10839         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10840     llvm::Value *NumIterVal =
10841         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10842                                  D.getNumIterations()->getType(), Int64Ty,
10843                                  D.getNumIterations()->getExprLoc());
10844     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10845     // dims.stride = 1;
10846     LValue StrideLVal = CGF.EmitLValueForField(
10847         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10848     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10849                           StrideLVal);
10850   }
10851 
10852   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10853   // kmp_int32 num_dims, struct kmp_dim * dims);
10854   llvm::Value *Args[] = {
10855       emitUpdateLocation(CGF, D.getBeginLoc()),
10856       getThreadID(CGF, D.getBeginLoc()),
10857       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10858       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10859           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10860           CGM.VoidPtrTy)};
10861 
10862   llvm::FunctionCallee RTLFn =
10863       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10864   CGF.EmitRuntimeCall(RTLFn, Args);
10865   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10866       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10867   llvm::FunctionCallee FiniRTLFn =
10868       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10869   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10870                                              llvm::makeArrayRef(FiniArgs));
10871 }
10872 
10873 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10874                                           const OMPDependClause *C) {
10875   QualType Int64Ty =
10876       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10877   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10878   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10879       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10880   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10881   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10882     const Expr *CounterVal = C->getLoopData(I);
10883     assert(CounterVal);
10884     llvm::Value *CntVal = CGF.EmitScalarConversion(
10885         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10886         CounterVal->getExprLoc());
10887     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10888                           /*Volatile=*/false, Int64Ty);
10889   }
10890   llvm::Value *Args[] = {
10891       emitUpdateLocation(CGF, C->getBeginLoc()),
10892       getThreadID(CGF, C->getBeginLoc()),
10893       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10894   llvm::FunctionCallee RTLFn;
10895   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10896     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10897   } else {
10898     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10899     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10900   }
10901   CGF.EmitRuntimeCall(RTLFn, Args);
10902 }
10903 
10904 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10905                                llvm::FunctionCallee Callee,
10906                                ArrayRef<llvm::Value *> Args) const {
10907   assert(Loc.isValid() && "Outlined function call location must be valid.");
10908   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10909 
10910   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10911     if (Fn->doesNotThrow()) {
10912       CGF.EmitNounwindRuntimeCall(Fn, Args);
10913       return;
10914     }
10915   }
10916   CGF.EmitRuntimeCall(Callee, Args);
10917 }
10918 
10919 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10920     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10921     ArrayRef<llvm::Value *> Args) const {
10922   emitCall(CGF, Loc, OutlinedFn, Args);
10923 }
10924 
10925 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10926   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10927     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10928       HasEmittedDeclareTargetRegion = true;
10929 }
10930 
10931 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10932                                              const VarDecl *NativeParam,
10933                                              const VarDecl *TargetParam) const {
10934   return CGF.GetAddrOfLocalVar(NativeParam);
10935 }
10936 
10937 namespace {
10938 /// Cleanup action for allocate support.
10939 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10940 public:
10941   static const int CleanupArgs = 3;
10942 
10943 private:
10944   llvm::FunctionCallee RTLFn;
10945   llvm::Value *Args[CleanupArgs];
10946 
10947 public:
10948   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10949                        ArrayRef<llvm::Value *> CallArgs)
10950       : RTLFn(RTLFn) {
10951     assert(CallArgs.size() == CleanupArgs &&
10952            "Size of arguments does not match.");
10953     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10954   }
10955   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10956     if (!CGF.HaveInsertPoint())
10957       return;
10958     CGF.EmitRuntimeCall(RTLFn, Args);
10959   }
10960 };
10961 } // namespace
10962 
10963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10964                                                    const VarDecl *VD) {
10965   if (!VD)
10966     return Address::invalid();
10967   const VarDecl *CVD = VD->getCanonicalDecl();
10968   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10969     return Address::invalid();
10970   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10971   // Use the default allocation.
10972   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10973       !AA->getAllocator())
10974     return Address::invalid();
10975   llvm::Value *Size;
10976   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10977   if (CVD->getType()->isVariablyModifiedType()) {
10978     Size = CGF.getTypeSize(CVD->getType());
10979     // Align the size: ((size + align - 1) / align) * align
10980     Size = CGF.Builder.CreateNUWAdd(
10981         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10982     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10983     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10984   } else {
10985     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10986     Size = CGM.getSize(Sz.alignTo(Align));
10987   }
10988   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10989   assert(AA->getAllocator() &&
10990          "Expected allocator expression for non-default allocator.");
10991   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10992   // According to the standard, the original allocator type is a enum (integer).
10993   // Convert to pointer type, if required.
10994   if (Allocator->getType()->isIntegerTy())
10995     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10996   else if (Allocator->getType()->isPointerTy())
10997     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10998                                                                 CGM.VoidPtrTy);
10999   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11000 
11001   llvm::Value *Addr =
11002       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11003                           CVD->getName() + ".void.addr");
11004   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11005                                                               Allocator};
11006   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11007 
11008   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11009                                                 llvm::makeArrayRef(FiniArgs));
11010   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11011       Addr,
11012       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11013       CVD->getName() + ".addr");
11014   return Address(Addr, Align);
11015 }
11016 
11017 namespace {
11018 using OMPContextSelectorData =
11019     OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
11020 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11021 } // anonymous namespace
11022 
11023 /// Checks current context and returns true if it matches the context selector.
11024 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
11025           typename... Arguments>
11026 static bool checkContext(const OMPContextSelectorData &Data,
11027                          Arguments... Params) {
11028   assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11029          "Unknown context selector or context selector set.");
11030   return false;
11031 }
11032 
11033 /// Checks for implementation={vendor(<vendor>)} context selector.
11034 /// \returns true iff <vendor>="llvm", false otherwise.
11035 template <>
11036 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
11037     const OMPContextSelectorData &Data) {
11038   return llvm::all_of(Data.Names,
11039                       [](StringRef S) { return !S.compare_lower("llvm"); });
11040 }
11041 
11042 /// Checks for device={kind(<kind>)} context selector.
11043 /// \returns true if <kind>="host" and compilation is for host.
11044 /// true if <kind>="nohost" and compilation is for device.
11045 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11046 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11047 /// false otherwise.
11048 template <>
11049 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11050     const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11051   for (StringRef Name : Data.Names) {
11052     if (!Name.compare_lower("host")) {
11053       if (CGM.getLangOpts().OpenMPIsDevice)
11054         return false;
11055       continue;
11056     }
11057     if (!Name.compare_lower("nohost")) {
11058       if (!CGM.getLangOpts().OpenMPIsDevice)
11059         return false;
11060       continue;
11061     }
11062     switch (CGM.getTriple().getArch()) {
11063     case llvm::Triple::arm:
11064     case llvm::Triple::armeb:
11065     case llvm::Triple::aarch64:
11066     case llvm::Triple::aarch64_be:
11067     case llvm::Triple::aarch64_32:
11068     case llvm::Triple::ppc:
11069     case llvm::Triple::ppc64:
11070     case llvm::Triple::ppc64le:
11071     case llvm::Triple::x86:
11072     case llvm::Triple::x86_64:
11073       if (Name.compare_lower("cpu"))
11074         return false;
11075       break;
11076     case llvm::Triple::amdgcn:
11077     case llvm::Triple::nvptx:
11078     case llvm::Triple::nvptx64:
11079       if (Name.compare_lower("gpu"))
11080         return false;
11081       break;
11082     case llvm::Triple::UnknownArch:
11083     case llvm::Triple::arc:
11084     case llvm::Triple::avr:
11085     case llvm::Triple::bpfel:
11086     case llvm::Triple::bpfeb:
11087     case llvm::Triple::hexagon:
11088     case llvm::Triple::mips:
11089     case llvm::Triple::mipsel:
11090     case llvm::Triple::mips64:
11091     case llvm::Triple::mips64el:
11092     case llvm::Triple::msp430:
11093     case llvm::Triple::r600:
11094     case llvm::Triple::riscv32:
11095     case llvm::Triple::riscv64:
11096     case llvm::Triple::sparc:
11097     case llvm::Triple::sparcv9:
11098     case llvm::Triple::sparcel:
11099     case llvm::Triple::systemz:
11100     case llvm::Triple::tce:
11101     case llvm::Triple::tcele:
11102     case llvm::Triple::thumb:
11103     case llvm::Triple::thumbeb:
11104     case llvm::Triple::xcore:
11105     case llvm::Triple::le32:
11106     case llvm::Triple::le64:
11107     case llvm::Triple::amdil:
11108     case llvm::Triple::amdil64:
11109     case llvm::Triple::hsail:
11110     case llvm::Triple::hsail64:
11111     case llvm::Triple::spir:
11112     case llvm::Triple::spir64:
11113     case llvm::Triple::kalimba:
11114     case llvm::Triple::shave:
11115     case llvm::Triple::lanai:
11116     case llvm::Triple::wasm32:
11117     case llvm::Triple::wasm64:
11118     case llvm::Triple::renderscript32:
11119     case llvm::Triple::renderscript64:
11120     case llvm::Triple::ve:
11121       return false;
11122     }
11123   }
11124   return true;
11125 }
11126 
11127 static bool matchesContext(CodeGenModule &CGM,
11128                            const CompleteOMPContextSelectorData &ContextData) {
11129   for (const OMPContextSelectorData &Data : ContextData) {
11130     switch (Data.Ctx) {
11131     case OMP_CTX_vendor:
11132       assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11133              "Expected implementation context selector set.");
11134       if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
11135         return false;
11136       break;
11137     case OMP_CTX_kind:
11138       assert(Data.CtxSet == OMP_CTX_SET_device &&
11139              "Expected device context selector set.");
11140       if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11141                                                                            CGM))
11142         return false;
11143       break;
11144     case OMP_CTX_unknown:
11145       llvm_unreachable("Unknown context selector kind.");
11146     }
11147   }
11148   return true;
11149 }
11150 
11151 static CompleteOMPContextSelectorData
11152 translateAttrToContextSelectorData(ASTContext &C,
11153                                    const OMPDeclareVariantAttr *A) {
11154   CompleteOMPContextSelectorData Data;
11155   for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11156     Data.emplace_back();
11157     auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11158         *std::next(A->ctxSelectorSets_begin(), I));
11159     auto Ctx = static_cast<OpenMPContextSelectorKind>(
11160         *std::next(A->ctxSelectors_begin(), I));
11161     Data.back().CtxSet = CtxSet;
11162     Data.back().Ctx = Ctx;
11163     const Expr *Score = *std::next(A->scores_begin(), I);
11164     Data.back().Score = Score->EvaluateKnownConstInt(C);
11165     switch (Ctx) {
11166     case OMP_CTX_vendor:
11167       assert(CtxSet == OMP_CTX_SET_implementation &&
11168              "Expected implementation context selector set.");
11169       Data.back().Names =
11170           llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11171       break;
11172     case OMP_CTX_kind:
11173       assert(CtxSet == OMP_CTX_SET_device &&
11174              "Expected device context selector set.");
11175       Data.back().Names =
11176           llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11177       break;
11178     case OMP_CTX_unknown:
11179       llvm_unreachable("Unknown context selector kind.");
11180     }
11181   }
11182   return Data;
11183 }
11184 
11185 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11186                            const CompleteOMPContextSelectorData &RHS) {
11187   llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11188   for (const OMPContextSelectorData &D : RHS) {
11189     auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11190     Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11191   }
11192   bool AllSetsAreEqual = true;
11193   for (const OMPContextSelectorData &D : LHS) {
11194     auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11195     if (It == RHSData.end())
11196       return false;
11197     if (D.Names.size() > It->getSecond().size())
11198       return false;
11199     if (llvm::set_union(It->getSecond(), D.Names))
11200       return false;
11201     AllSetsAreEqual =
11202         AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11203   }
11204 
11205   return LHS.size() != RHS.size() || !AllSetsAreEqual;
11206 }
11207 
11208 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11209                             const CompleteOMPContextSelectorData &RHS) {
11210   // Score is calculated as sum of all scores + 1.
11211   llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11212   bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11213   if (RHSIsSubsetOfLHS) {
11214     LHSScore = llvm::APSInt::get(0);
11215   } else {
11216     for (const OMPContextSelectorData &Data : LHS) {
11217       if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11218         LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11219       } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11220         LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11221       } else {
11222         LHSScore += Data.Score;
11223       }
11224     }
11225   }
11226   llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11227   if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11228     RHSScore = llvm::APSInt::get(0);
11229   } else {
11230     for (const OMPContextSelectorData &Data : RHS) {
11231       if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11232         RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11233       } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11234         RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11235       } else {
11236         RHSScore += Data.Score;
11237       }
11238     }
11239   }
11240   return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11241 }
11242 
11243 /// Finds the variant function that matches current context with its context
11244 /// selector.
11245 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11246                                                      const FunctionDecl *FD) {
11247   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11248     return FD;
11249   // Iterate through all DeclareVariant attributes and check context selectors.
11250   const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11251   CompleteOMPContextSelectorData TopMostData;
11252   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11253     CompleteOMPContextSelectorData Data =
11254         translateAttrToContextSelectorData(CGM.getContext(), A);
11255     if (!matchesContext(CGM, Data))
11256       continue;
11257     // If the attribute matches the context, find the attribute with the highest
11258     // score.
11259     if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11260       TopMostAttr = A;
11261       TopMostData.swap(Data);
11262     }
11263   }
11264   if (!TopMostAttr)
11265     return FD;
11266   return cast<FunctionDecl>(
11267       cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11268           ->getDecl());
11269 }
11270 
11271 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11272   const auto *D = cast<FunctionDecl>(GD.getDecl());
11273   // If the original function is defined already, use its definition.
11274   StringRef MangledName = CGM.getMangledName(GD);
11275   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11276   if (Orig && !Orig->isDeclaration())
11277     return false;
11278   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11279   // Emit original function if it does not have declare variant attribute or the
11280   // context does not match.
11281   if (NewFD == D)
11282     return false;
11283   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11284   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11285     DeferredVariantFunction.erase(D);
11286     return true;
11287   }
11288   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11289   return true;
11290 }
11291 
11292 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11293     CodeGenModule &CGM, const OMPLoopDirective &S)
11294     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11295   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11296   if (!NeedToPush)
11297     return;
11298   NontemporalDeclsSet &DS =
11299       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11300   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11301     for (const Stmt *Ref : C->private_refs()) {
11302       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11303       const ValueDecl *VD;
11304       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11305         VD = DRE->getDecl();
11306       } else {
11307         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11308         assert((ME->isImplicitCXXThis() ||
11309                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11310                "Expected member of current class.");
11311         VD = ME->getMemberDecl();
11312       }
11313       DS.insert(VD);
11314     }
11315   }
11316 }
11317 
11318 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11319   if (!NeedToPush)
11320     return;
11321   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11322 }
11323 
11324 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11325   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11326 
11327   return llvm::any_of(
11328       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11329       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11330 }
11331 
11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11333     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11334     : CGM(CGF.CGM),
11335       NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11336                               [](const OMPLastprivateClause *C) {
11337                                 return C->getKind() ==
11338                                        OMPC_LASTPRIVATE_conditional;
11339                               })) {
11340   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11341   if (!NeedToPush)
11342     return;
11343   LastprivateConditionalData &Data =
11344       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11345   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11346     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11347       continue;
11348 
11349     for (const Expr *Ref : C->varlists()) {
11350       Data.DeclToUniqeName.try_emplace(
11351           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11352           generateUniqueName(CGM, "pl_cond", Ref));
11353     }
11354   }
11355   Data.IVLVal = IVLVal;
11356   // In simd only mode or for simd directives no need to generate threadprivate
11357   // references for the loop iteration counter, we can use the original one
11358   // since outlining cannot happen in simd regions.
11359   if (CGF.getLangOpts().OpenMPSimd ||
11360       isOpenMPSimdDirective(S.getDirectiveKind())) {
11361     Data.UseOriginalIV = true;
11362     return;
11363   }
11364   llvm::SmallString<16> Buffer;
11365   llvm::raw_svector_ostream OS(Buffer);
11366   PresumedLoc PLoc =
11367       CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11368   assert(PLoc.isValid() && "Source location is expected to be always valid.");
11369 
11370   llvm::sys::fs::UniqueID ID;
11371   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11372     CGM.getDiags().Report(diag::err_cannot_open_file)
11373         << PLoc.getFilename() << EC.message();
11374   OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11375      << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11376   Data.IVName = OS.str();
11377 }
11378 
11379 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11380   if (!NeedToPush)
11381     return;
11382   CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11383 }
11384 
11385 void CGOpenMPRuntime::initLastprivateConditionalCounter(
11386     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11387   if (CGM.getLangOpts().OpenMPSimd ||
11388       !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11389                     [](const OMPLastprivateClause *C) {
11390                       return C->getKind() == OMPC_LASTPRIVATE_conditional;
11391                     }))
11392     return;
11393   const CGOpenMPRuntime::LastprivateConditionalData &Data =
11394       LastprivateConditionalStack.back();
11395   if (Data.UseOriginalIV)
11396     return;
11397   // Global loop counter. Required to handle inner parallel-for regions.
11398   // global_iv = iv;
11399   Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11400       CGF, Data.IVLVal.getType(), Data.IVName);
11401   LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11402   llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11403   CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11404 }
11405 
11406 namespace {
11407 /// Checks if the lastprivate conditional variable is referenced in LHS.
11408 class LastprivateConditionalRefChecker final
11409     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11410   CodeGenFunction &CGF;
11411   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11412   const Expr *FoundE = nullptr;
11413   const Decl *FoundD = nullptr;
11414   StringRef UniqueDeclName;
11415   LValue IVLVal;
11416   StringRef IVName;
11417   SourceLocation Loc;
11418   bool UseOriginalIV = false;
11419 
11420 public:
11421   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11422     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11423          llvm::reverse(LPM)) {
11424       auto It = D.DeclToUniqeName.find(E->getDecl());
11425       if (It == D.DeclToUniqeName.end())
11426         continue;
11427       FoundE = E;
11428       FoundD = E->getDecl()->getCanonicalDecl();
11429       UniqueDeclName = It->getSecond();
11430       IVLVal = D.IVLVal;
11431       IVName = D.IVName;
11432       UseOriginalIV = D.UseOriginalIV;
11433       break;
11434     }
11435     return FoundE == E;
11436   }
11437   bool VisitMemberExpr(const MemberExpr *E) {
11438     if (!CGF.IsWrappedCXXThis(E->getBase()))
11439       return false;
11440     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11441          llvm::reverse(LPM)) {
11442       auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11443       if (It == D.DeclToUniqeName.end())
11444         continue;
11445       FoundE = E;
11446       FoundD = E->getMemberDecl()->getCanonicalDecl();
11447       UniqueDeclName = It->getSecond();
11448       IVLVal = D.IVLVal;
11449       IVName = D.IVName;
11450       UseOriginalIV = D.UseOriginalIV;
11451       break;
11452     }
11453     return FoundE == E;
11454   }
11455   bool VisitStmt(const Stmt *S) {
11456     for (const Stmt *Child : S->children()) {
11457       if (!Child)
11458         continue;
11459       if (const auto *E = dyn_cast<Expr>(Child))
11460         if (!E->isGLValue())
11461           continue;
11462       if (Visit(Child))
11463         return true;
11464     }
11465     return false;
11466   }
11467   explicit LastprivateConditionalRefChecker(
11468       CodeGenFunction &CGF,
11469       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11470       : CGF(CGF), LPM(LPM) {}
11471   std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11472   getFoundData() const {
11473     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11474                            UseOriginalIV);
11475   }
11476 };
11477 } // namespace
11478 
11479 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11480                                                          const Expr *LHS) {
11481   if (CGF.getLangOpts().OpenMP < 50)
11482     return;
11483   LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11484   if (!Checker.Visit(LHS))
11485     return;
11486   const Expr *FoundE;
11487   const Decl *FoundD;
11488   StringRef UniqueDeclName;
11489   LValue IVLVal;
11490   StringRef IVName;
11491   bool UseOriginalIV;
11492   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11493       Checker.getFoundData();
11494 
11495   // Last updated loop counter for the lastprivate conditional var.
11496   // int<xx> last_iv = 0;
11497   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11498   llvm::Constant *LastIV =
11499       getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11500   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11501       IVLVal.getAlignment().getAsAlign());
11502   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11503 
11504   // Private address of the lastprivate conditional in the current context.
11505   // priv_a
11506   LValue LVal = CGF.EmitLValue(FoundE);
11507   // Last value of the lastprivate conditional.
11508   // decltype(priv_a) last_a;
11509   llvm::Constant *Last = getOrCreateInternalVariable(
11510       LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11511   cast<llvm::GlobalVariable>(Last)->setAlignment(
11512       LVal.getAlignment().getAsAlign());
11513   LValue LastLVal =
11514       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11515 
11516   // Global loop counter. Required to handle inner parallel-for regions.
11517   // global_iv
11518   if (!UseOriginalIV) {
11519     Address IVAddr =
11520         getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11521     IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11522   }
11523   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11524 
11525   // #pragma omp critical(a)
11526   // if (last_iv <= iv) {
11527   //   last_iv = iv;
11528   //   last_a = priv_a;
11529   // }
11530   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11531                     FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11532     Action.Enter(CGF);
11533     llvm::Value *LastIVVal =
11534         CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11535     // (last_iv <= global_iv) ? Check if the variable is updated and store new
11536     // value in global var.
11537     llvm::Value *CmpRes;
11538     if (IVLVal.getType()->isSignedIntegerType()) {
11539       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11540     } else {
11541       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542              "Loop iteration variable must be integer.");
11543       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11544     }
11545     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11548     // {
11549     CGF.EmitBlock(ThenBB);
11550 
11551     //   last_iv = global_iv;
11552     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11553 
11554     //   last_a = priv_a;
11555     switch (CGF.getEvaluationKind(LVal.getType())) {
11556     case TEK_Scalar: {
11557       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11558       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11559       break;
11560     }
11561     case TEK_Complex: {
11562       CodeGenFunction::ComplexPairTy PrivVal =
11563           CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11564       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11565       break;
11566     }
11567     case TEK_Aggregate:
11568       llvm_unreachable(
11569           "Aggregates are not supported in lastprivate conditional.");
11570     }
11571     // }
11572     CGF.EmitBranch(ExitBB);
11573     // There is no need to emit line number for unconditional branch.
11574     (void)ApplyDebugLocation::CreateEmpty(CGF);
11575     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11576   };
11577 
11578   if (CGM.getLangOpts().OpenMPSimd) {
11579     // Do not emit as a critical region as no parallel region could be emitted.
11580     RegionCodeGenTy ThenRCG(CodeGen);
11581     ThenRCG(CGF);
11582   } else {
11583     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11584   }
11585 }
11586 
11587 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11588     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11589     SourceLocation Loc) {
11590   if (CGF.getLangOpts().OpenMP < 50)
11591     return;
11592   auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11593   assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11594          "Unknown lastprivate conditional variable.");
11595   StringRef UniqueName = It->getSecond();
11596   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11597   // The variable was not updated in the region - exit.
11598   if (!GV)
11599     return;
11600   LValue LPLVal = CGF.MakeAddrLValue(
11601       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11602   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11603   CGF.EmitStoreOfScalar(Res, PrivLVal);
11604 }
11605 
11606 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11607     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11608     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11609   llvm_unreachable("Not supported in SIMD-only mode");
11610 }
11611 
11612 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11613     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11614     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11615   llvm_unreachable("Not supported in SIMD-only mode");
11616 }
11617 
11618 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11619     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11620     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11621     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11622     bool Tied, unsigned &NumberOfParts) {
11623   llvm_unreachable("Not supported in SIMD-only mode");
11624 }
11625 
11626 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11627                                            SourceLocation Loc,
11628                                            llvm::Function *OutlinedFn,
11629                                            ArrayRef<llvm::Value *> CapturedVars,
11630                                            const Expr *IfCond) {
11631   llvm_unreachable("Not supported in SIMD-only mode");
11632 }
11633 
11634 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11635     CodeGenFunction &CGF, StringRef CriticalName,
11636     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11637     const Expr *Hint) {
11638   llvm_unreachable("Not supported in SIMD-only mode");
11639 }
11640 
11641 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11642                                            const RegionCodeGenTy &MasterOpGen,
11643                                            SourceLocation Loc) {
11644   llvm_unreachable("Not supported in SIMD-only mode");
11645 }
11646 
11647 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11648                                             SourceLocation Loc) {
11649   llvm_unreachable("Not supported in SIMD-only mode");
11650 }
11651 
11652 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11653     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11654     SourceLocation Loc) {
11655   llvm_unreachable("Not supported in SIMD-only mode");
11656 }
11657 
11658 void CGOpenMPSIMDRuntime::emitSingleRegion(
11659     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11660     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11661     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11662     ArrayRef<const Expr *> AssignmentOps) {
11663   llvm_unreachable("Not supported in SIMD-only mode");
11664 }
11665 
11666 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11667                                             const RegionCodeGenTy &OrderedOpGen,
11668                                             SourceLocation Loc,
11669                                             bool IsThreads) {
11670   llvm_unreachable("Not supported in SIMD-only mode");
11671 }
11672 
11673 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11674                                           SourceLocation Loc,
11675                                           OpenMPDirectiveKind Kind,
11676                                           bool EmitChecks,
11677                                           bool ForceSimpleCall) {
11678   llvm_unreachable("Not supported in SIMD-only mode");
11679 }
11680 
11681 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11682     CodeGenFunction &CGF, SourceLocation Loc,
11683     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11684     bool Ordered, const DispatchRTInput &DispatchValues) {
11685   llvm_unreachable("Not supported in SIMD-only mode");
11686 }
11687 
11688 void CGOpenMPSIMDRuntime::emitForStaticInit(
11689     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11690     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11691   llvm_unreachable("Not supported in SIMD-only mode");
11692 }
11693 
11694 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11695     CodeGenFunction &CGF, SourceLocation Loc,
11696     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11697   llvm_unreachable("Not supported in SIMD-only mode");
11698 }
11699 
11700 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11701                                                      SourceLocation Loc,
11702                                                      unsigned IVSize,
11703                                                      bool IVSigned) {
11704   llvm_unreachable("Not supported in SIMD-only mode");
11705 }
11706 
11707 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11708                                               SourceLocation Loc,
11709                                               OpenMPDirectiveKind DKind) {
11710   llvm_unreachable("Not supported in SIMD-only mode");
11711 }
11712 
11713 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11714                                               SourceLocation Loc,
11715                                               unsigned IVSize, bool IVSigned,
11716                                               Address IL, Address LB,
11717                                               Address UB, Address ST) {
11718   llvm_unreachable("Not supported in SIMD-only mode");
11719 }
11720 
11721 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11722                                                llvm::Value *NumThreads,
11723                                                SourceLocation Loc) {
11724   llvm_unreachable("Not supported in SIMD-only mode");
11725 }
11726 
11727 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11728                                              ProcBindKind ProcBind,
11729                                              SourceLocation Loc) {
11730   llvm_unreachable("Not supported in SIMD-only mode");
11731 }
11732 
11733 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11734                                                     const VarDecl *VD,
11735                                                     Address VDAddr,
11736                                                     SourceLocation Loc) {
11737   llvm_unreachable("Not supported in SIMD-only mode");
11738 }
11739 
11740 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11741     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11742     CodeGenFunction *CGF) {
11743   llvm_unreachable("Not supported in SIMD-only mode");
11744 }
11745 
11746 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11747     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11748   llvm_unreachable("Not supported in SIMD-only mode");
11749 }
11750 
11751 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11752                                     ArrayRef<const Expr *> Vars,
11753                                     SourceLocation Loc) {
11754   llvm_unreachable("Not supported in SIMD-only mode");
11755 }
11756 
11757 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11758                                        const OMPExecutableDirective &D,
11759                                        llvm::Function *TaskFunction,
11760                                        QualType SharedsTy, Address Shareds,
11761                                        const Expr *IfCond,
11762                                        const OMPTaskDataTy &Data) {
11763   llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765 
11766 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11767     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11768     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11769     const Expr *IfCond, const OMPTaskDataTy &Data) {
11770   llvm_unreachable("Not supported in SIMD-only mode");
11771 }
11772 
11773 void CGOpenMPSIMDRuntime::emitReduction(
11774     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11775     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11776     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11777   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11778   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11779                                  ReductionOps, Options);
11780 }
11781 
11782 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11783     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11784     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11785   llvm_unreachable("Not supported in SIMD-only mode");
11786 }
11787 
11788 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11789                                                   SourceLocation Loc,
11790                                                   ReductionCodeGen &RCG,
11791                                                   unsigned N) {
11792   llvm_unreachable("Not supported in SIMD-only mode");
11793 }
11794 
11795 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11796                                                   SourceLocation Loc,
11797                                                   llvm::Value *ReductionsPtr,
11798                                                   LValue SharedLVal) {
11799   llvm_unreachable("Not supported in SIMD-only mode");
11800 }
11801 
11802 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11803                                            SourceLocation Loc) {
11804   llvm_unreachable("Not supported in SIMD-only mode");
11805 }
11806 
11807 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11808     CodeGenFunction &CGF, SourceLocation Loc,
11809     OpenMPDirectiveKind CancelRegion) {
11810   llvm_unreachable("Not supported in SIMD-only mode");
11811 }
11812 
11813 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11814                                          SourceLocation Loc, const Expr *IfCond,
11815                                          OpenMPDirectiveKind CancelRegion) {
11816   llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818 
11819 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11820     const OMPExecutableDirective &D, StringRef ParentName,
11821     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11822     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11823   llvm_unreachable("Not supported in SIMD-only mode");
11824 }
11825 
11826 void CGOpenMPSIMDRuntime::emitTargetCall(
11827     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11828     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11829     const Expr *Device,
11830     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11831                                      const OMPLoopDirective &D)>
11832         SizeEmitter) {
11833   llvm_unreachable("Not supported in SIMD-only mode");
11834 }
11835 
11836 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11837   llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839 
11840 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11841   llvm_unreachable("Not supported in SIMD-only mode");
11842 }
11843 
11844 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11845   return false;
11846 }
11847 
11848 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11849                                         const OMPExecutableDirective &D,
11850                                         SourceLocation Loc,
11851                                         llvm::Function *OutlinedFn,
11852                                         ArrayRef<llvm::Value *> CapturedVars) {
11853   llvm_unreachable("Not supported in SIMD-only mode");
11854 }
11855 
11856 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11857                                              const Expr *NumTeams,
11858                                              const Expr *ThreadLimit,
11859                                              SourceLocation Loc) {
11860   llvm_unreachable("Not supported in SIMD-only mode");
11861 }
11862 
11863 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11864     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11865     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11866   llvm_unreachable("Not supported in SIMD-only mode");
11867 }
11868 
11869 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11870     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11871     const Expr *Device) {
11872   llvm_unreachable("Not supported in SIMD-only mode");
11873 }
11874 
11875 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11876                                            const OMPLoopDirective &D,
11877                                            ArrayRef<Expr *> NumIterations) {
11878   llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880 
11881 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11882                                               const OMPDependClause *C) {
11883   llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885 
11886 const VarDecl *
11887 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11888                                         const VarDecl *NativeParam) const {
11889   llvm_unreachable("Not supported in SIMD-only mode");
11890 }
11891 
11892 Address
11893 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11894                                          const VarDecl *NativeParam,
11895                                          const VarDecl *TargetParam) const {
11896   llvm_unreachable("Not supported in SIMD-only mode");
11897 }
11898