1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/CodeGen/ConstantInitBuilder.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetOperations.h"
27 #include "llvm/Bitcode/BitcodeReader.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <cassert>
35
36 using namespace clang;
37 using namespace CodeGen;
38 using namespace llvm::omp;
39
40 namespace {
41 /// Base class for handling code generation inside OpenMP regions.
42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
43 public:
44 /// Kinds of OpenMP regions used in codegen.
45 enum CGOpenMPRegionKind {
46 /// Region with outlined function for standalone 'parallel'
47 /// directive.
48 ParallelOutlinedRegion,
49 /// Region with outlined function for standalone 'task' directive.
50 TaskOutlinedRegion,
51 /// Region for constructs that do not require function outlining,
52 /// like 'for', 'sections', 'atomic' etc. directives.
53 InlinedRegion,
54 /// Region with outlined function for standalone 'target' directive.
55 TargetRegion,
56 };
57
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)58 CGOpenMPRegionInfo(const CapturedStmt &CS,
59 const CGOpenMPRegionKind RegionKind,
60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61 bool HasCancel)
62 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
63 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
64
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)65 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67 bool HasCancel)
68 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
69 Kind(Kind), HasCancel(HasCancel) {}
70
71 /// Get a variable or parameter for storing global thread id
72 /// inside OpenMP construct.
73 virtual const VarDecl *getThreadIDVariable() const = 0;
74
75 /// Emit the captured statement body.
76 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
77
78 /// Get an LValue for the current ThreadID variable.
79 /// \return LValue for thread id variable. This LValue always has type int32*.
80 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
81
emitUntiedSwitch(CodeGenFunction &)82 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
83
getRegionKind() const84 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
85
getDirectiveKind() const86 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
87
hasCancel() const88 bool hasCancel() const { return HasCancel; }
89
classof(const CGCapturedStmtInfo * Info)90 static bool classof(const CGCapturedStmtInfo *Info) {
91 return Info->getKind() == CR_OpenMP;
92 }
93
94 ~CGOpenMPRegionInfo() override = default;
95
96 protected:
97 CGOpenMPRegionKind RegionKind;
98 RegionCodeGenTy CodeGen;
99 OpenMPDirectiveKind Kind;
100 bool HasCancel;
101 };
102
103 /// API for captured statement code generation in OpenMP constructs.
104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
105 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)106 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
107 const RegionCodeGenTy &CodeGen,
108 OpenMPDirectiveKind Kind, bool HasCancel,
109 StringRef HelperName)
110 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
111 HasCancel),
112 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
113 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
114 }
115
116 /// Get a variable or parameter for storing global thread id
117 /// inside OpenMP construct.
getThreadIDVariable() const118 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
119
120 /// Get the name of the capture helper.
getHelperName() const121 StringRef getHelperName() const override { return HelperName; }
122
classof(const CGCapturedStmtInfo * Info)123 static bool classof(const CGCapturedStmtInfo *Info) {
124 return CGOpenMPRegionInfo::classof(Info) &&
125 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
126 ParallelOutlinedRegion;
127 }
128
129 private:
130 /// A variable or parameter storing global thread id for OpenMP
131 /// constructs.
132 const VarDecl *ThreadIDVar;
133 StringRef HelperName;
134 };
135
136 /// API for captured statement code generation in OpenMP constructs.
137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
138 public:
139 class UntiedTaskActionTy final : public PrePostActionTy {
140 bool Untied;
141 const VarDecl *PartIDVar;
142 const RegionCodeGenTy UntiedCodeGen;
143 llvm::SwitchInst *UntiedSwitch = nullptr;
144
145 public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)146 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
147 const RegionCodeGenTy &UntiedCodeGen)
148 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)149 void Enter(CodeGenFunction &CGF) override {
150 if (Untied) {
151 // Emit task switching point.
152 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
153 CGF.GetAddrOfLocalVar(PartIDVar),
154 PartIDVar->getType()->castAs<PointerType>());
155 llvm::Value *Res =
156 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
157 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
158 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
159 CGF.EmitBlock(DoneBB);
160 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
161 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
162 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
163 CGF.Builder.GetInsertBlock());
164 emitUntiedSwitch(CGF);
165 }
166 }
emitUntiedSwitch(CodeGenFunction & CGF) const167 void emitUntiedSwitch(CodeGenFunction &CGF) const {
168 if (Untied) {
169 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
170 CGF.GetAddrOfLocalVar(PartIDVar),
171 PartIDVar->getType()->castAs<PointerType>());
172 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
173 PartIdLVal);
174 UntiedCodeGen(CGF);
175 CodeGenFunction::JumpDest CurPoint =
176 CGF.getJumpDestInCurrentScope(".untied.next.");
177 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
178 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
179 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180 CGF.Builder.GetInsertBlock());
181 CGF.EmitBranchThroughCleanup(CurPoint);
182 CGF.EmitBlock(CurPoint.getBlock());
183 }
184 }
getNumberOfParts() const185 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
186 };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)187 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
188 const VarDecl *ThreadIDVar,
189 const RegionCodeGenTy &CodeGen,
190 OpenMPDirectiveKind Kind, bool HasCancel,
191 const UntiedTaskActionTy &Action)
192 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
193 ThreadIDVar(ThreadIDVar), Action(Action) {
194 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
195 }
196
197 /// Get a variable or parameter for storing global thread id
198 /// inside OpenMP construct.
getThreadIDVariable() const199 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
200
201 /// Get an LValue for the current ThreadID variable.
202 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
203
204 /// Get the name of the capture helper.
getHelperName() const205 StringRef getHelperName() const override { return ".omp_outlined."; }
206
emitUntiedSwitch(CodeGenFunction & CGF)207 void emitUntiedSwitch(CodeGenFunction &CGF) override {
208 Action.emitUntiedSwitch(CGF);
209 }
210
classof(const CGCapturedStmtInfo * Info)211 static bool classof(const CGCapturedStmtInfo *Info) {
212 return CGOpenMPRegionInfo::classof(Info) &&
213 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
214 TaskOutlinedRegion;
215 }
216
217 private:
218 /// A variable or parameter storing global thread id for OpenMP
219 /// constructs.
220 const VarDecl *ThreadIDVar;
221 /// Action for emitting code for untied tasks.
222 const UntiedTaskActionTy &Action;
223 };
224
225 /// API for inlined captured statement code generation in OpenMP
226 /// constructs.
227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
228 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)229 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
230 const RegionCodeGenTy &CodeGen,
231 OpenMPDirectiveKind Kind, bool HasCancel)
232 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
233 OldCSI(OldCSI),
234 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
235
236 // Retrieve the value of the context parameter.
getContextValue() const237 llvm::Value *getContextValue() const override {
238 if (OuterRegionInfo)
239 return OuterRegionInfo->getContextValue();
240 llvm_unreachable("No context value for inlined OpenMP region");
241 }
242
setContextValue(llvm::Value * V)243 void setContextValue(llvm::Value *V) override {
244 if (OuterRegionInfo) {
245 OuterRegionInfo->setContextValue(V);
246 return;
247 }
248 llvm_unreachable("No context value for inlined OpenMP region");
249 }
250
251 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const252 const FieldDecl *lookup(const VarDecl *VD) const override {
253 if (OuterRegionInfo)
254 return OuterRegionInfo->lookup(VD);
255 // If there is no outer outlined region,no need to lookup in a list of
256 // captured variables, we can use the original one.
257 return nullptr;
258 }
259
getThisFieldDecl() const260 FieldDecl *getThisFieldDecl() const override {
261 if (OuterRegionInfo)
262 return OuterRegionInfo->getThisFieldDecl();
263 return nullptr;
264 }
265
266 /// Get a variable or parameter for storing global thread id
267 /// inside OpenMP construct.
getThreadIDVariable() const268 const VarDecl *getThreadIDVariable() const override {
269 if (OuterRegionInfo)
270 return OuterRegionInfo->getThreadIDVariable();
271 return nullptr;
272 }
273
274 /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)275 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
276 if (OuterRegionInfo)
277 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
278 llvm_unreachable("No LValue for inlined OpenMP construct");
279 }
280
281 /// Get the name of the capture helper.
getHelperName() const282 StringRef getHelperName() const override {
283 if (auto *OuterRegionInfo = getOldCSI())
284 return OuterRegionInfo->getHelperName();
285 llvm_unreachable("No helper name for inlined OpenMP construct");
286 }
287
emitUntiedSwitch(CodeGenFunction & CGF)288 void emitUntiedSwitch(CodeGenFunction &CGF) override {
289 if (OuterRegionInfo)
290 OuterRegionInfo->emitUntiedSwitch(CGF);
291 }
292
getOldCSI() const293 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
294
classof(const CGCapturedStmtInfo * Info)295 static bool classof(const CGCapturedStmtInfo *Info) {
296 return CGOpenMPRegionInfo::classof(Info) &&
297 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
298 }
299
300 ~CGOpenMPInlinedRegionInfo() override = default;
301
302 private:
303 /// CodeGen info about outer OpenMP region.
304 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
305 CGOpenMPRegionInfo *OuterRegionInfo;
306 };
307
308 /// API for captured statement code generation in OpenMP target
309 /// constructs. For this captures, implicit parameters are used instead of the
310 /// captured fields. The name of the target region has to be unique in a given
311 /// application so it is provided by the client, because only the client has
312 /// the information to generate that.
313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
314 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)315 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
316 const RegionCodeGenTy &CodeGen, StringRef HelperName)
317 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
318 /*HasCancel=*/false),
319 HelperName(HelperName) {}
320
321 /// This is unused for target regions because each starts executing
322 /// with a single thread.
getThreadIDVariable() const323 const VarDecl *getThreadIDVariable() const override { return nullptr; }
324
325 /// Get the name of the capture helper.
getHelperName() const326 StringRef getHelperName() const override { return HelperName; }
327
classof(const CGCapturedStmtInfo * Info)328 static bool classof(const CGCapturedStmtInfo *Info) {
329 return CGOpenMPRegionInfo::classof(Info) &&
330 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
331 }
332
333 private:
334 StringRef HelperName;
335 };
336
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
338 llvm_unreachable("No codegen for expressions");
339 }
340 /// API for generation of expressions captured in a innermost OpenMP
341 /// region.
342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
343 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)344 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
345 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
346 OMPD_unknown,
347 /*HasCancel=*/false),
348 PrivScope(CGF) {
349 // Make sure the globals captured in the provided statement are local by
350 // using the privatization logic. We assume the same variable is not
351 // captured more than once.
352 for (const auto &C : CS.captures()) {
353 if (!C.capturesVariable() && !C.capturesVariableByCopy())
354 continue;
355
356 const VarDecl *VD = C.getCapturedVar();
357 if (VD->isLocalVarDeclOrParm())
358 continue;
359
360 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
361 /*RefersToEnclosingVariableOrCapture=*/false,
362 VD->getType().getNonReferenceType(), VK_LValue,
363 C.getLocation());
364 PrivScope.addPrivate(
365 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
366 }
367 (void)PrivScope.Privatize();
368 }
369
370 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const371 const FieldDecl *lookup(const VarDecl *VD) const override {
372 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
373 return FD;
374 return nullptr;
375 }
376
377 /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)378 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
379 llvm_unreachable("No body for expressions");
380 }
381
382 /// Get a variable or parameter for storing global thread id
383 /// inside OpenMP construct.
getThreadIDVariable() const384 const VarDecl *getThreadIDVariable() const override {
385 llvm_unreachable("No thread id for expressions");
386 }
387
388 /// Get the name of the capture helper.
getHelperName() const389 StringRef getHelperName() const override {
390 llvm_unreachable("No helper name for expressions");
391 }
392
classof(const CGCapturedStmtInfo * Info)393 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
394
395 private:
396 /// Private scope to capture global variables.
397 CodeGenFunction::OMPPrivateScope PrivScope;
398 };
399
400 /// RAII for emitting code of OpenMP constructs.
401 class InlinedOpenMPRegionRAII {
402 CodeGenFunction &CGF;
403 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
404 FieldDecl *LambdaThisCaptureField = nullptr;
405 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
406
407 public:
408 /// Constructs region for combined constructs.
409 /// \param CodeGen Code generation sequence for combined directives. Includes
410 /// a list of functions used for code generation of implicitly inlined
411 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)412 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
413 OpenMPDirectiveKind Kind, bool HasCancel)
414 : CGF(CGF) {
415 // Start emission for the construct.
416 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
417 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
418 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
419 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
420 CGF.LambdaThisCaptureField = nullptr;
421 BlockInfo = CGF.BlockInfo;
422 CGF.BlockInfo = nullptr;
423 }
424
~InlinedOpenMPRegionRAII()425 ~InlinedOpenMPRegionRAII() {
426 // Restore original CapturedStmtInfo only if we're done with code emission.
427 auto *OldCSI =
428 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
429 delete CGF.CapturedStmtInfo;
430 CGF.CapturedStmtInfo = OldCSI;
431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
433 CGF.BlockInfo = BlockInfo;
434 }
435 };
436
437 /// Values for bit flags used in the ident_t to describe the fields.
438 /// All enumeric elements are named and described in accordance with the code
439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
440 enum OpenMPLocationFlags : unsigned {
441 /// Use trampoline for internal microtask.
442 OMP_IDENT_IMD = 0x01,
443 /// Use c-style ident structure.
444 OMP_IDENT_KMPC = 0x02,
445 /// Atomic reduction option for kmpc_reduce.
446 OMP_ATOMIC_REDUCE = 0x10,
447 /// Explicit 'barrier' directive.
448 OMP_IDENT_BARRIER_EXPL = 0x20,
449 /// Implicit barrier in code.
450 OMP_IDENT_BARRIER_IMPL = 0x40,
451 /// Implicit barrier in 'for' directive.
452 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
453 /// Implicit barrier in 'sections' directive.
454 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
455 /// Implicit barrier in 'single' directive.
456 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
457 /// Call of __kmp_for_static_init for static loop.
458 OMP_IDENT_WORK_LOOP = 0x200,
459 /// Call of __kmp_for_static_init for sections.
460 OMP_IDENT_WORK_SECTIONS = 0x400,
461 /// Call of __kmp_for_static_init for distribute.
462 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
463 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
464 };
465
466 namespace {
467 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
468 /// Values for bit flags for marking which requires clauses have been used.
469 enum OpenMPOffloadingRequiresDirFlags : int64_t {
470 /// flag undefined.
471 OMP_REQ_UNDEFINED = 0x000,
472 /// no requires clause present.
473 OMP_REQ_NONE = 0x001,
474 /// reverse_offload clause.
475 OMP_REQ_REVERSE_OFFLOAD = 0x002,
476 /// unified_address clause.
477 OMP_REQ_UNIFIED_ADDRESS = 0x004,
478 /// unified_shared_memory clause.
479 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
480 /// dynamic_allocators clause.
481 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
483 };
484
485 enum OpenMPOffloadingReservedDeviceIDs {
486 /// Device ID if the device was not defined, runtime should get it
487 /// from environment variables in the spec.
488 OMP_DEVICEID_UNDEF = -1,
489 };
490 } // anonymous namespace
491
492 /// Describes ident structure that describes a source location.
493 /// All descriptions are taken from
494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
495 /// Original structure:
496 /// typedef struct ident {
497 /// kmp_int32 reserved_1; /**< might be used in Fortran;
498 /// see above */
499 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
500 /// KMP_IDENT_KMPC identifies this union
501 /// member */
502 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
503 /// see above */
504 ///#if USE_ITT_BUILD
505 /// /* but currently used for storing
506 /// region-specific ITT */
507 /// /* contextual information. */
508 ///#endif /* USE_ITT_BUILD */
509 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
510 /// C++ */
511 /// char const *psource; /**< String describing the source location.
512 /// The string is composed of semi-colon separated
513 // fields which describe the source file,
514 /// the function and a pair of line numbers that
515 /// delimit the construct.
516 /// */
517 /// } ident_t;
518 enum IdentFieldIndex {
519 /// might be used in Fortran
520 IdentField_Reserved_1,
521 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
522 IdentField_Flags,
523 /// Not really used in Fortran any more
524 IdentField_Reserved_2,
525 /// Source[4] in Fortran, do not use for C++
526 IdentField_Reserved_3,
527 /// String describing the source location. The string is composed of
528 /// semi-colon separated fields which describe the source file, the function
529 /// and a pair of line numbers that delimit the construct.
530 IdentField_PSource
531 };
532
533 /// Schedule types for 'omp for' loops (these enumerators are taken from
534 /// the enum sched_type in kmp.h).
535 enum OpenMPSchedType {
536 /// Lower bound for default (unordered) versions.
537 OMP_sch_lower = 32,
538 OMP_sch_static_chunked = 33,
539 OMP_sch_static = 34,
540 OMP_sch_dynamic_chunked = 35,
541 OMP_sch_guided_chunked = 36,
542 OMP_sch_runtime = 37,
543 OMP_sch_auto = 38,
544 /// static with chunk adjustment (e.g., simd)
545 OMP_sch_static_balanced_chunked = 45,
546 /// Lower bound for 'ordered' versions.
547 OMP_ord_lower = 64,
548 OMP_ord_static_chunked = 65,
549 OMP_ord_static = 66,
550 OMP_ord_dynamic_chunked = 67,
551 OMP_ord_guided_chunked = 68,
552 OMP_ord_runtime = 69,
553 OMP_ord_auto = 70,
554 OMP_sch_default = OMP_sch_static,
555 /// dist_schedule types
556 OMP_dist_sch_static_chunked = 91,
557 OMP_dist_sch_static = 92,
558 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
559 /// Set if the monotonic schedule modifier was present.
560 OMP_sch_modifier_monotonic = (1 << 29),
561 /// Set if the nonmonotonic schedule modifier was present.
562 OMP_sch_modifier_nonmonotonic = (1 << 30),
563 };
564
565 enum OpenMPRTLFunction {
566 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
567 /// kmpc_micro microtask, ...);
568 OMPRTL__kmpc_fork_call,
569 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
570 /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
571 OMPRTL__kmpc_threadprivate_cached,
572 /// Call to void __kmpc_threadprivate_register( ident_t *,
573 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
574 OMPRTL__kmpc_threadprivate_register,
575 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
576 OMPRTL__kmpc_global_thread_num,
577 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
578 // kmp_critical_name *crit);
579 OMPRTL__kmpc_critical,
580 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
581 // global_tid, kmp_critical_name *crit, uintptr_t hint);
582 OMPRTL__kmpc_critical_with_hint,
583 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
584 // kmp_critical_name *crit);
585 OMPRTL__kmpc_end_critical,
586 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
587 // global_tid);
588 OMPRTL__kmpc_cancel_barrier,
589 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
590 OMPRTL__kmpc_barrier,
591 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
592 OMPRTL__kmpc_for_static_fini,
593 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
594 // global_tid);
595 OMPRTL__kmpc_serialized_parallel,
596 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
597 // global_tid);
598 OMPRTL__kmpc_end_serialized_parallel,
599 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
600 // kmp_int32 num_threads);
601 OMPRTL__kmpc_push_num_threads,
602 // Call to void __kmpc_flush(ident_t *loc);
603 OMPRTL__kmpc_flush,
604 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
605 OMPRTL__kmpc_master,
606 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
607 OMPRTL__kmpc_end_master,
608 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609 // int end_part);
610 OMPRTL__kmpc_omp_taskyield,
611 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
612 OMPRTL__kmpc_single,
613 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
614 OMPRTL__kmpc_end_single,
615 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
616 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
617 // kmp_routine_entry_t *task_entry);
618 OMPRTL__kmpc_omp_task_alloc,
619 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
620 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
621 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
622 // kmp_int64 device_id);
623 OMPRTL__kmpc_omp_target_task_alloc,
624 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
625 // new_task);
626 OMPRTL__kmpc_omp_task,
627 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
628 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
629 // kmp_int32 didit);
630 OMPRTL__kmpc_copyprivate,
631 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
632 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
633 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
634 OMPRTL__kmpc_reduce,
635 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
636 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
637 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
638 // *lck);
639 OMPRTL__kmpc_reduce_nowait,
640 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
641 // kmp_critical_name *lck);
642 OMPRTL__kmpc_end_reduce,
643 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
644 // kmp_critical_name *lck);
645 OMPRTL__kmpc_end_reduce_nowait,
646 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
647 // kmp_task_t * new_task);
648 OMPRTL__kmpc_omp_task_begin_if0,
649 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
650 // kmp_task_t * new_task);
651 OMPRTL__kmpc_omp_task_complete_if0,
652 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
653 OMPRTL__kmpc_ordered,
654 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
655 OMPRTL__kmpc_end_ordered,
656 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
657 // global_tid);
658 OMPRTL__kmpc_omp_taskwait,
659 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
660 OMPRTL__kmpc_taskgroup,
661 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
662 OMPRTL__kmpc_end_taskgroup,
663 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
664 // int proc_bind);
665 OMPRTL__kmpc_push_proc_bind,
666 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
667 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
668 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
669 OMPRTL__kmpc_omp_task_with_deps,
670 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
671 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
672 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
673 OMPRTL__kmpc_omp_wait_deps,
674 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
675 // global_tid, kmp_int32 cncl_kind);
676 OMPRTL__kmpc_cancellationpoint,
677 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
678 // kmp_int32 cncl_kind);
679 OMPRTL__kmpc_cancel,
680 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
681 // kmp_int32 num_teams, kmp_int32 thread_limit);
682 OMPRTL__kmpc_push_num_teams,
683 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
684 // microtask, ...);
685 OMPRTL__kmpc_fork_teams,
686 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
687 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
688 // sched, kmp_uint64 grainsize, void *task_dup);
689 OMPRTL__kmpc_taskloop,
690 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
691 // num_dims, struct kmp_dim *dims);
692 OMPRTL__kmpc_doacross_init,
693 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
694 OMPRTL__kmpc_doacross_fini,
695 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
696 // *vec);
697 OMPRTL__kmpc_doacross_post,
698 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
699 // *vec);
700 OMPRTL__kmpc_doacross_wait,
701 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
702 // *data);
703 OMPRTL__kmpc_task_reduction_init,
704 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
705 // *d);
706 OMPRTL__kmpc_task_reduction_get_th_data,
707 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
708 OMPRTL__kmpc_alloc,
709 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
710 OMPRTL__kmpc_free,
711
712 //
713 // Offloading related calls
714 //
715 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
716 // size);
717 OMPRTL__kmpc_push_target_tripcount,
718 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
719 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
720 // *arg_types);
721 OMPRTL__tgt_target,
722 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
723 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
724 // *arg_types);
725 OMPRTL__tgt_target_nowait,
726 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
727 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
728 // *arg_types, int32_t num_teams, int32_t thread_limit);
729 OMPRTL__tgt_target_teams,
730 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
731 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
732 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
733 OMPRTL__tgt_target_teams_nowait,
734 // Call to void __tgt_register_requires(int64_t flags);
735 OMPRTL__tgt_register_requires,
736 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
737 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
738 OMPRTL__tgt_target_data_begin,
739 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
740 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
741 // *arg_types);
742 OMPRTL__tgt_target_data_begin_nowait,
743 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
744 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
745 OMPRTL__tgt_target_data_end,
746 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
747 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
748 // *arg_types);
749 OMPRTL__tgt_target_data_end_nowait,
750 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
751 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
752 OMPRTL__tgt_target_data_update,
753 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
754 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
755 // *arg_types);
756 OMPRTL__tgt_target_data_update_nowait,
757 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
758 OMPRTL__tgt_mapper_num_components,
759 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
760 // *base, void *begin, int64_t size, int64_t type);
761 OMPRTL__tgt_push_mapper_component,
762 };
763
764 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
765 /// region.
766 class CleanupTy final : public EHScopeStack::Cleanup {
767 PrePostActionTy *Action;
768
769 public:
CleanupTy(PrePostActionTy * Action)770 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)771 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
772 if (!CGF.HaveInsertPoint())
773 return;
774 Action->Exit(CGF);
775 }
776 };
777
778 } // anonymous namespace
779
operator ()(CodeGenFunction & CGF) const780 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
781 CodeGenFunction::RunCleanupsScope Scope(CGF);
782 if (PrePostAction) {
783 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
784 Callback(CodeGen, CGF, *PrePostAction);
785 } else {
786 PrePostActionTy Action;
787 Callback(CodeGen, CGF, Action);
788 }
789 }
790
791 /// Check if the combiner is a call to UDR combiner and if it is so return the
792 /// UDR decl used for reduction.
793 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)794 getReductionInit(const Expr *ReductionOp) {
795 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
796 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
797 if (const auto *DRE =
798 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
799 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
800 return DRD;
801 return nullptr;
802 }
803
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)804 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
805 const OMPDeclareReductionDecl *DRD,
806 const Expr *InitOp,
807 Address Private, Address Original,
808 QualType Ty) {
809 if (DRD->getInitializer()) {
810 std::pair<llvm::Function *, llvm::Function *> Reduction =
811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
812 const auto *CE = cast<CallExpr>(InitOp);
813 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
814 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
815 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
816 const auto *LHSDRE =
817 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
818 const auto *RHSDRE =
819 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
820 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
821 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
822 [=]() { return Private; });
823 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
824 [=]() { return Original; });
825 (void)PrivateScope.Privatize();
826 RValue Func = RValue::get(Reduction.second);
827 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
828 CGF.EmitIgnoredExpr(InitOp);
829 } else {
830 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
831 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
832 auto *GV = new llvm::GlobalVariable(
833 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
834 llvm::GlobalValue::PrivateLinkage, Init, Name);
835 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
836 RValue InitRVal;
837 switch (CGF.getEvaluationKind(Ty)) {
838 case TEK_Scalar:
839 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
840 break;
841 case TEK_Complex:
842 InitRVal =
843 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
844 break;
845 case TEK_Aggregate:
846 InitRVal = RValue::getAggregate(LV.getAddress(CGF));
847 break;
848 }
849 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
850 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
851 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
852 /*IsInitializer=*/false);
853 }
854 }
855
856 /// Emit initialization of arrays of complex types.
857 /// \param DestAddr Address of the array.
858 /// \param Type Type of array.
859 /// \param Init Initial expression of array.
860 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())861 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
862 QualType Type, bool EmitDeclareReductionInit,
863 const Expr *Init,
864 const OMPDeclareReductionDecl *DRD,
865 Address SrcAddr = Address::invalid()) {
866 // Perform element-by-element initialization.
867 QualType ElementTy;
868
869 // Drill down to the base element type on both arrays.
870 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
871 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
872 DestAddr =
873 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
874 if (DRD)
875 SrcAddr =
876 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
877
878 llvm::Value *SrcBegin = nullptr;
879 if (DRD)
880 SrcBegin = SrcAddr.getPointer();
881 llvm::Value *DestBegin = DestAddr.getPointer();
882 // Cast from pointer to array type to pointer to single element.
883 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
884 // The basic structure here is a while-do loop.
885 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
886 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
887 llvm::Value *IsEmpty =
888 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
889 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
890
891 // Enter the loop body, making that address the current address.
892 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
893 CGF.EmitBlock(BodyBB);
894
895 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
896
897 llvm::PHINode *SrcElementPHI = nullptr;
898 Address SrcElementCurrent = Address::invalid();
899 if (DRD) {
900 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
901 "omp.arraycpy.srcElementPast");
902 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
903 SrcElementCurrent =
904 Address(SrcElementPHI,
905 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906 }
907 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
908 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
909 DestElementPHI->addIncoming(DestBegin, EntryBB);
910 Address DestElementCurrent =
911 Address(DestElementPHI,
912 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
913
914 // Emit copy.
915 {
916 CodeGenFunction::RunCleanupsScope InitScope(CGF);
917 if (EmitDeclareReductionInit) {
918 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
919 SrcElementCurrent, ElementTy);
920 } else
921 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
922 /*IsInitializer=*/false);
923 }
924
925 if (DRD) {
926 // Shift the address forward by one element.
927 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
928 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
929 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
930 }
931
932 // Shift the address forward by one element.
933 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
934 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
935 // Check whether we've reached the end.
936 llvm::Value *Done =
937 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
938 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
939 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
940
941 // Done.
942 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
943 }
944
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)945 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
946 return CGF.EmitOMPSharedLValue(E);
947 }
948
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)949 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
950 const Expr *E) {
951 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
952 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
953 return LValue();
954 }
955
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,const OMPDeclareReductionDecl * DRD)956 void ReductionCodeGen::emitAggregateInitialization(
957 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
958 const OMPDeclareReductionDecl *DRD) {
959 // Emit VarDecl with copy init for arrays.
960 // Get the address of the original variable captured in current
961 // captured region.
962 const auto *PrivateVD =
963 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
964 bool EmitDeclareReductionInit =
965 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
966 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
967 EmitDeclareReductionInit,
968 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
969 : PrivateVD->getInit(),
970 DRD, SharedLVal.getAddress(CGF));
971 }
972
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)973 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
974 ArrayRef<const Expr *> Privates,
975 ArrayRef<const Expr *> ReductionOps) {
976 ClausesData.reserve(Shareds.size());
977 SharedAddresses.reserve(Shareds.size());
978 Sizes.reserve(Shareds.size());
979 BaseDecls.reserve(Shareds.size());
980 auto IPriv = Privates.begin();
981 auto IRed = ReductionOps.begin();
982 for (const Expr *Ref : Shareds) {
983 ClausesData.emplace_back(Ref, *IPriv, *IRed);
984 std::advance(IPriv, 1);
985 std::advance(IRed, 1);
986 }
987 }
988
emitSharedLValue(CodeGenFunction & CGF,unsigned N)989 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
990 assert(SharedAddresses.size() == N &&
991 "Number of generated lvalues must be exactly N.");
992 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
993 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
994 SharedAddresses.emplace_back(First, Second);
995 }
996
emitAggregateType(CodeGenFunction & CGF,unsigned N)997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
998 const auto *PrivateVD =
999 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1000 QualType PrivateType = PrivateVD->getType();
1001 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1002 if (!PrivateType->isVariablyModifiedType()) {
1003 Sizes.emplace_back(
1004 CGF.getTypeSize(
1005 SharedAddresses[N].first.getType().getNonReferenceType()),
1006 nullptr);
1007 return;
1008 }
1009 llvm::Value *Size;
1010 llvm::Value *SizeInChars;
1011 auto *ElemType = cast<llvm::PointerType>(
1012 SharedAddresses[N].first.getPointer(CGF)->getType())
1013 ->getElementType();
1014 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1015 if (AsArraySection) {
1016 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1017 SharedAddresses[N].first.getPointer(CGF));
1018 Size = CGF.Builder.CreateNUWAdd(
1019 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1020 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1021 } else {
1022 SizeInChars = CGF.getTypeSize(
1023 SharedAddresses[N].first.getType().getNonReferenceType());
1024 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1025 }
1026 Sizes.emplace_back(SizeInChars, Size);
1027 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1028 CGF,
1029 cast<OpaqueValueExpr>(
1030 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1031 RValue::get(Size));
1032 CGF.EmitVariablyModifiedType(PrivateType);
1033 }
1034
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)1035 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1036 llvm::Value *Size) {
1037 const auto *PrivateVD =
1038 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1039 QualType PrivateType = PrivateVD->getType();
1040 if (!PrivateType->isVariablyModifiedType()) {
1041 assert(!Size && !Sizes[N].second &&
1042 "Size should be nullptr for non-variably modified reduction "
1043 "items.");
1044 return;
1045 }
1046 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1047 CGF,
1048 cast<OpaqueValueExpr>(
1049 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1050 RValue::get(Size));
1051 CGF.EmitVariablyModifiedType(PrivateType);
1052 }
1053
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)1054 void ReductionCodeGen::emitInitialization(
1055 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1056 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1057 assert(SharedAddresses.size() > N && "No variable was generated");
1058 const auto *PrivateVD =
1059 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1060 const OMPDeclareReductionDecl *DRD =
1061 getReductionInit(ClausesData[N].ReductionOp);
1062 QualType PrivateType = PrivateVD->getType();
1063 PrivateAddr = CGF.Builder.CreateElementBitCast(
1064 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1065 QualType SharedType = SharedAddresses[N].first.getType();
1066 SharedLVal = CGF.MakeAddrLValue(
1067 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1068 CGF.ConvertTypeForMem(SharedType)),
1069 SharedType, SharedAddresses[N].first.getBaseInfo(),
1070 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1071 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1072 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1073 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1074 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1075 PrivateAddr, SharedLVal.getAddress(CGF),
1076 SharedLVal.getType());
1077 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1078 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1079 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1080 PrivateVD->getType().getQualifiers(),
1081 /*IsInitializer=*/false);
1082 }
1083 }
1084
needCleanups(unsigned N)1085 bool ReductionCodeGen::needCleanups(unsigned N) {
1086 const auto *PrivateVD =
1087 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1088 QualType PrivateType = PrivateVD->getType();
1089 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1090 return DTorKind != QualType::DK_none;
1091 }
1092
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)1093 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1094 Address PrivateAddr) {
1095 const auto *PrivateVD =
1096 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1097 QualType PrivateType = PrivateVD->getType();
1098 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1099 if (needCleanups(N)) {
1100 PrivateAddr = CGF.Builder.CreateElementBitCast(
1101 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1102 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1103 }
1104 }
1105
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)1106 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1107 LValue BaseLV) {
1108 BaseTy = BaseTy.getNonReferenceType();
1109 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1110 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1111 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1112 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1113 } else {
1114 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1115 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1116 }
1117 BaseTy = BaseTy->getPointeeType();
1118 }
1119 return CGF.MakeAddrLValue(
1120 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1121 CGF.ConvertTypeForMem(ElTy)),
1122 BaseLV.getType(), BaseLV.getBaseInfo(),
1123 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1124 }
1125
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,llvm::Type * BaseLVType,CharUnits BaseLVAlignment,llvm::Value * Addr)1126 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1127 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1128 llvm::Value *Addr) {
1129 Address Tmp = Address::invalid();
1130 Address TopTmp = Address::invalid();
1131 Address MostTopTmp = Address::invalid();
1132 BaseTy = BaseTy.getNonReferenceType();
1133 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1134 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1135 Tmp = CGF.CreateMemTemp(BaseTy);
1136 if (TopTmp.isValid())
1137 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1138 else
1139 MostTopTmp = Tmp;
1140 TopTmp = Tmp;
1141 BaseTy = BaseTy->getPointeeType();
1142 }
1143 llvm::Type *Ty = BaseLVType;
1144 if (Tmp.isValid())
1145 Ty = Tmp.getElementType();
1146 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1147 if (Tmp.isValid()) {
1148 CGF.Builder.CreateStore(Addr, Tmp);
1149 return MostTopTmp;
1150 }
1151 return Address(Addr, BaseLVAlignment);
1152 }
1153
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)1154 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1155 const VarDecl *OrigVD = nullptr;
1156 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1157 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1158 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1159 Base = TempOASE->getBase()->IgnoreParenImpCasts();
1160 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1161 Base = TempASE->getBase()->IgnoreParenImpCasts();
1162 DE = cast<DeclRefExpr>(Base);
1163 OrigVD = cast<VarDecl>(DE->getDecl());
1164 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1165 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1166 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1167 Base = TempASE->getBase()->IgnoreParenImpCasts();
1168 DE = cast<DeclRefExpr>(Base);
1169 OrigVD = cast<VarDecl>(DE->getDecl());
1170 }
1171 return OrigVD;
1172 }
1173
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)1174 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1175 Address PrivateAddr) {
1176 const DeclRefExpr *DE;
1177 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1178 BaseDecls.emplace_back(OrigVD);
1179 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1180 LValue BaseLValue =
1181 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1182 OriginalBaseLValue);
1183 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1184 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1185 llvm::Value *PrivatePointer =
1186 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1187 PrivateAddr.getPointer(),
1188 SharedAddresses[N].first.getAddress(CGF).getType());
1189 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1190 return castToBase(CGF, OrigVD->getType(),
1191 SharedAddresses[N].first.getType(),
1192 OriginalBaseLValue.getAddress(CGF).getType(),
1193 OriginalBaseLValue.getAlignment(), Ptr);
1194 }
1195 BaseDecls.emplace_back(
1196 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1197 return PrivateAddr;
1198 }
1199
usesReductionInitializer(unsigned N) const1200 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1201 const OMPDeclareReductionDecl *DRD =
1202 getReductionInit(ClausesData[N].ReductionOp);
1203 return DRD && DRD->getInitializer();
1204 }
1205
getThreadIDVariableLValue(CodeGenFunction & CGF)1206 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1207 return CGF.EmitLoadOfPointerLValue(
1208 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1209 getThreadIDVariable()->getType()->castAs<PointerType>());
1210 }
1211
EmitBody(CodeGenFunction & CGF,const Stmt *)1212 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1213 if (!CGF.HaveInsertPoint())
1214 return;
1215 // 1.2.2 OpenMP Language Terminology
1216 // Structured block - An executable statement with a single entry at the
1217 // top and a single exit at the bottom.
1218 // The point of exit cannot be a branch out of the structured block.
1219 // longjmp() and throw() must not violate the entry/exit criteria.
1220 CGF.EHStack.pushTerminate();
1221 CodeGen(CGF);
1222 CGF.EHStack.popTerminate();
1223 }
1224
getThreadIDVariableLValue(CodeGenFunction & CGF)1225 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1226 CodeGenFunction &CGF) {
1227 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1228 getThreadIDVariable()->getType(),
1229 AlignmentSource::Decl);
1230 }
1231
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1232 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1233 QualType FieldTy) {
1234 auto *Field = FieldDecl::Create(
1235 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1236 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1237 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1238 Field->setAccess(AS_public);
1239 DC->addDecl(Field);
1240 return Field;
1241 }
1242
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1243 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1244 StringRef Separator)
1245 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1246 OffloadEntriesInfoManager(CGM) {
1247 ASTContext &C = CGM.getContext();
1248 RecordDecl *RD = C.buildImplicitRecord("ident_t");
1249 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1250 RD->startDefinition();
1251 // reserved_1
1252 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253 // flags
1254 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255 // reserved_2
1256 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257 // reserved_3
1258 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1259 // psource
1260 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1261 RD->completeDefinition();
1262 IdentQTy = C.getRecordType(RD);
1263 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1264 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1265
1266 loadOffloadInfoMetadata();
1267 }
1268
tryEmitDeclareVariant(const GlobalDecl & NewGD,const GlobalDecl & OldGD,llvm::GlobalValue * OrigAddr,bool IsForDefinition)1269 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1270 const GlobalDecl &OldGD,
1271 llvm::GlobalValue *OrigAddr,
1272 bool IsForDefinition) {
1273 // Emit at least a definition for the aliasee if the the address of the
1274 // original function is requested.
1275 if (IsForDefinition || OrigAddr)
1276 (void)CGM.GetAddrOfGlobal(NewGD);
1277 StringRef NewMangledName = CGM.getMangledName(NewGD);
1278 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1279 if (Addr && !Addr->isDeclaration()) {
1280 const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1281 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1282 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1283
1284 // Create a reference to the named value. This ensures that it is emitted
1285 // if a deferred decl.
1286 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1287
1288 // Create the new alias itself, but don't set a name yet.
1289 auto *GA =
1290 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1291
1292 if (OrigAddr) {
1293 assert(OrigAddr->isDeclaration() && "Expected declaration");
1294
1295 GA->takeName(OrigAddr);
1296 OrigAddr->replaceAllUsesWith(
1297 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1298 OrigAddr->eraseFromParent();
1299 } else {
1300 GA->setName(CGM.getMangledName(OldGD));
1301 }
1302
1303 // Set attributes which are particular to an alias; this is a
1304 // specialization of the attributes which may be set on a global function.
1305 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1306 D->isWeakImported())
1307 GA->setLinkage(llvm::Function::WeakAnyLinkage);
1308
1309 CGM.SetCommonAttributes(OldGD, GA);
1310 return true;
1311 }
1312 return false;
1313 }
1314
clear()1315 void CGOpenMPRuntime::clear() {
1316 InternalVars.clear();
1317 // Clean non-target variable declarations possibly used only in debug info.
1318 for (const auto &Data : EmittedNonTargetVariables) {
1319 if (!Data.getValue().pointsToAliveValue())
1320 continue;
1321 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1322 if (!GV)
1323 continue;
1324 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1325 continue;
1326 GV->eraseFromParent();
1327 }
1328 // Emit aliases for the deferred aliasees.
1329 for (const auto &Pair : DeferredVariantFunction) {
1330 StringRef MangledName = CGM.getMangledName(Pair.second.second);
1331 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1332 // If not able to emit alias, just emit original declaration.
1333 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1334 /*IsForDefinition=*/false);
1335 }
1336 }
1337
getName(ArrayRef<StringRef> Parts) const1338 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1339 SmallString<128> Buffer;
1340 llvm::raw_svector_ostream OS(Buffer);
1341 StringRef Sep = FirstSeparator;
1342 for (StringRef Part : Parts) {
1343 OS << Sep << Part;
1344 Sep = Separator;
1345 }
1346 return OS.str();
1347 }
1348
1349 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1350 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1351 const Expr *CombinerInitializer, const VarDecl *In,
1352 const VarDecl *Out, bool IsCombiner) {
1353 // void .omp_combiner.(Ty *in, Ty *out);
1354 ASTContext &C = CGM.getContext();
1355 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1356 FunctionArgList Args;
1357 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1358 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1360 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1361 Args.push_back(&OmpOutParm);
1362 Args.push_back(&OmpInParm);
1363 const CGFunctionInfo &FnInfo =
1364 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1365 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1366 std::string Name = CGM.getOpenMPRuntime().getName(
1367 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1368 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1369 Name, &CGM.getModule());
1370 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1371 if (CGM.getLangOpts().Optimize) {
1372 Fn->removeFnAttr(llvm::Attribute::NoInline);
1373 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1374 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1375 }
1376 CodeGenFunction CGF(CGM);
1377 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1378 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1379 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1380 Out->getLocation());
1381 CodeGenFunction::OMPPrivateScope Scope(CGF);
1382 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1383 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1384 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1385 .getAddress(CGF);
1386 });
1387 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1388 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1389 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1390 .getAddress(CGF);
1391 });
1392 (void)Scope.Privatize();
1393 if (!IsCombiner && Out->hasInit() &&
1394 !CGF.isTrivialInitializer(Out->getInit())) {
1395 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1396 Out->getType().getQualifiers(),
1397 /*IsInitializer=*/true);
1398 }
1399 if (CombinerInitializer)
1400 CGF.EmitIgnoredExpr(CombinerInitializer);
1401 Scope.ForceCleanup();
1402 CGF.FinishFunction();
1403 return Fn;
1404 }
1405
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1406 void CGOpenMPRuntime::emitUserDefinedReduction(
1407 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1408 if (UDRMap.count(D) > 0)
1409 return;
1410 llvm::Function *Combiner = emitCombinerOrInitializer(
1411 CGM, D->getType(), D->getCombiner(),
1412 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1413 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1414 /*IsCombiner=*/true);
1415 llvm::Function *Initializer = nullptr;
1416 if (const Expr *Init = D->getInitializer()) {
1417 Initializer = emitCombinerOrInitializer(
1418 CGM, D->getType(),
1419 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1420 : nullptr,
1421 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1422 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1423 /*IsCombiner=*/false);
1424 }
1425 UDRMap.try_emplace(D, Combiner, Initializer);
1426 if (CGF) {
1427 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1428 Decls.second.push_back(D);
1429 }
1430 }
1431
1432 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1433 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1434 auto I = UDRMap.find(D);
1435 if (I != UDRMap.end())
1436 return I->second;
1437 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1438 return UDRMap.lookup(D);
1439 }
1440
1441 namespace {
1442 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1443 // Builder if one is present.
1444 struct PushAndPopStackRAII {
PushAndPopStackRAII__anona2876c6b0811::PushAndPopStackRAII1445 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1446 bool HasCancel)
1447 : OMPBuilder(OMPBuilder) {
1448 if (!OMPBuilder)
1449 return;
1450
1451 // The following callback is the crucial part of clangs cleanup process.
1452 //
1453 // NOTE:
1454 // Once the OpenMPIRBuilder is used to create parallel regions (and
1455 // similar), the cancellation destination (Dest below) is determined via
1456 // IP. That means if we have variables to finalize we split the block at IP,
1457 // use the new block (=BB) as destination to build a JumpDest (via
1458 // getJumpDestInCurrentScope(BB)) which then is fed to
1459 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1460 // to push & pop an FinalizationInfo object.
1461 // The FiniCB will still be needed but at the point where the
1462 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1463 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1464 assert(IP.getBlock()->end() == IP.getPoint() &&
1465 "Clang CG should cause non-terminated block!");
1466 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1467 CGF.Builder.restoreIP(IP);
1468 CodeGenFunction::JumpDest Dest =
1469 CGF.getOMPCancelDestination(OMPD_parallel);
1470 CGF.EmitBranchThroughCleanup(Dest);
1471 };
1472
1473 // TODO: Remove this once we emit parallel regions through the
1474 // OpenMPIRBuilder as it can do this setup internally.
1475 llvm::OpenMPIRBuilder::FinalizationInfo FI(
1476 {FiniCB, OMPD_parallel, HasCancel});
1477 OMPBuilder->pushFinalizationCB(std::move(FI));
1478 }
~PushAndPopStackRAII__anona2876c6b0811::PushAndPopStackRAII1479 ~PushAndPopStackRAII() {
1480 if (OMPBuilder)
1481 OMPBuilder->popFinalizationCB();
1482 }
1483 llvm::OpenMPIRBuilder *OMPBuilder;
1484 };
1485 } // namespace
1486
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1487 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1488 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1489 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1490 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1491 assert(ThreadIDVar->getType()->isPointerType() &&
1492 "thread id variable must be of type kmp_int32 *");
1493 CodeGenFunction CGF(CGM, true);
1494 bool HasCancel = false;
1495 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1496 HasCancel = OPD->hasCancel();
1497 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1498 HasCancel = OPSD->hasCancel();
1499 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1500 HasCancel = OPFD->hasCancel();
1501 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1502 HasCancel = OPFD->hasCancel();
1503 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1504 HasCancel = OPFD->hasCancel();
1505 else if (const auto *OPFD =
1506 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1507 HasCancel = OPFD->hasCancel();
1508 else if (const auto *OPFD =
1509 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1510 HasCancel = OPFD->hasCancel();
1511
1512 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1513 // parallel region to make cancellation barriers work properly.
1514 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1515 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1516 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1517 HasCancel, OutlinedHelperName);
1518 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1519 return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1520 }
1521
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1522 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1523 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1524 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1525 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1526 return emitParallelOrTeamsOutlinedFunction(
1527 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1528 }
1529
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1530 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1531 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1532 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1533 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1534 return emitParallelOrTeamsOutlinedFunction(
1535 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1536 }
1537
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1538 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1539 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1540 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1541 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1542 bool Tied, unsigned &NumberOfParts) {
1543 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1544 PrePostActionTy &) {
1545 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1546 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1547 llvm::Value *TaskArgs[] = {
1548 UpLoc, ThreadID,
1549 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1550 TaskTVar->getType()->castAs<PointerType>())
1551 .getPointer(CGF)};
1552 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1553 };
1554 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1555 UntiedCodeGen);
1556 CodeGen.setAction(Action);
1557 assert(!ThreadIDVar->getType()->isPointerType() &&
1558 "thread id variable must be of type kmp_int32 for tasks");
1559 const OpenMPDirectiveKind Region =
1560 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1561 : OMPD_task;
1562 const CapturedStmt *CS = D.getCapturedStmt(Region);
1563 const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1564 CodeGenFunction CGF(CGM, true);
1565 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1566 InnermostKind,
1567 TD ? TD->hasCancel() : false, Action);
1568 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1569 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1570 if (!Tied)
1571 NumberOfParts = Action.getNumberOfParts();
1572 return Res;
1573 }
1574
buildStructValue(ConstantStructBuilder & Fields,CodeGenModule & CGM,const RecordDecl * RD,const CGRecordLayout & RL,ArrayRef<llvm::Constant * > Data)1575 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1576 const RecordDecl *RD, const CGRecordLayout &RL,
1577 ArrayRef<llvm::Constant *> Data) {
1578 llvm::StructType *StructTy = RL.getLLVMType();
1579 unsigned PrevIdx = 0;
1580 ConstantInitBuilder CIBuilder(CGM);
1581 auto DI = Data.begin();
1582 for (const FieldDecl *FD : RD->fields()) {
1583 unsigned Idx = RL.getLLVMFieldNo(FD);
1584 // Fill the alignment.
1585 for (unsigned I = PrevIdx; I < Idx; ++I)
1586 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1587 PrevIdx = Idx + 1;
1588 Fields.add(*DI);
1589 ++DI;
1590 }
1591 }
1592
1593 template <class... As>
1594 static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule & CGM,QualType Ty,bool IsConstant,ArrayRef<llvm::Constant * > Data,const Twine & Name,As &&...Args)1595 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1596 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1597 As &&... Args) {
1598 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600 ConstantInitBuilder CIBuilder(CGM);
1601 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1602 buildStructValue(Fields, CGM, RD, RL, Data);
1603 return Fields.finishAndCreateGlobal(
1604 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1605 std::forward<As>(Args)...);
1606 }
1607
1608 template <typename T>
1609 static void
createConstantGlobalStructAndAddToParent(CodeGenModule & CGM,QualType Ty,ArrayRef<llvm::Constant * > Data,T & Parent)1610 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1611 ArrayRef<llvm::Constant *> Data,
1612 T &Parent) {
1613 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1614 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1615 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1616 buildStructValue(Fields, CGM, RD, RL, Data);
1617 Fields.finishAndAddTo(Parent);
1618 }
1619
getOrCreateDefaultLocation(unsigned Flags)1620 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1621 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1622 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1623 FlagsTy FlagsKey(Flags, Reserved2Flags);
1624 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1625 if (!Entry) {
1626 if (!DefaultOpenMPPSource) {
1627 // Initialize default location for psource field of ident_t structure of
1628 // all ident_t objects. Format is ";file;function;line;column;;".
1629 // Taken from
1630 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1631 DefaultOpenMPPSource =
1632 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1633 DefaultOpenMPPSource =
1634 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1635 }
1636
1637 llvm::Constant *Data[] = {
1638 llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1639 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1640 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1641 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1642 llvm::GlobalValue *DefaultOpenMPLocation =
1643 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1644 llvm::GlobalValue::PrivateLinkage);
1645 DefaultOpenMPLocation->setUnnamedAddr(
1646 llvm::GlobalValue::UnnamedAddr::Global);
1647
1648 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1649 }
1650 return Address(Entry, Align);
1651 }
1652
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1653 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1654 bool AtCurrentPoint) {
1655 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1657
1658 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1659 if (AtCurrentPoint) {
1660 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1661 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1662 } else {
1663 Elem.second.ServiceInsertPt =
1664 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1665 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1666 }
1667 }
1668
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1669 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671 if (Elem.second.ServiceInsertPt) {
1672 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1673 Elem.second.ServiceInsertPt = nullptr;
1674 Ptr->eraseFromParent();
1675 }
1676 }
1677
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1678 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1679 SourceLocation Loc,
1680 unsigned Flags) {
1681 Flags |= OMP_IDENT_KMPC;
1682 // If no debug info is generated - return global default location.
1683 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1684 Loc.isInvalid())
1685 return getOrCreateDefaultLocation(Flags).getPointer();
1686
1687 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1688
1689 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1690 Address LocValue = Address::invalid();
1691 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1692 if (I != OpenMPLocThreadIDMap.end())
1693 LocValue = Address(I->second.DebugLoc, Align);
1694
1695 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1696 // GetOpenMPThreadID was called before this routine.
1697 if (!LocValue.isValid()) {
1698 // Generate "ident_t .kmpc_loc.addr;"
1699 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1700 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1701 Elem.second.DebugLoc = AI.getPointer();
1702 LocValue = AI;
1703
1704 if (!Elem.second.ServiceInsertPt)
1705 setLocThreadIdInsertPt(CGF);
1706 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1707 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1708 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1709 CGF.getTypeSize(IdentQTy));
1710 }
1711
1712 // char **psource = &.kmpc_loc_<flags>.addr.psource;
1713 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1714 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1715 LValue PSource =
1716 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1717
1718 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1719 if (OMPDebugLoc == nullptr) {
1720 SmallString<128> Buffer2;
1721 llvm::raw_svector_ostream OS2(Buffer2);
1722 // Build debug location
1723 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1724 OS2 << ";" << PLoc.getFilename() << ";";
1725 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1726 OS2 << FD->getQualifiedNameAsString();
1727 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1728 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1729 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1730 }
1731 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1732 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1733
1734 // Our callers always pass this to a runtime function, so for
1735 // convenience, go ahead and return a naked pointer.
1736 return LocValue.getPointer();
1737 }
1738
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1739 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1740 SourceLocation Loc) {
1741 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1742
1743 llvm::Value *ThreadID = nullptr;
1744 // Check whether we've already cached a load of the thread id in this
1745 // function.
1746 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1747 if (I != OpenMPLocThreadIDMap.end()) {
1748 ThreadID = I->second.ThreadID;
1749 if (ThreadID != nullptr)
1750 return ThreadID;
1751 }
1752 // If exceptions are enabled, do not use parameter to avoid possible crash.
1753 if (auto *OMPRegionInfo =
1754 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1755 if (OMPRegionInfo->getThreadIDVariable()) {
1756 // Check if this an outlined function with thread id passed as argument.
1757 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1758 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1759 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1760 !CGF.getLangOpts().CXXExceptions ||
1761 CGF.Builder.GetInsertBlock() == TopBlock ||
1762 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1763 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1764 TopBlock ||
1765 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1766 CGF.Builder.GetInsertBlock()) {
1767 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1768 // If value loaded in entry block, cache it and use it everywhere in
1769 // function.
1770 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1771 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1772 Elem.second.ThreadID = ThreadID;
1773 }
1774 return ThreadID;
1775 }
1776 }
1777 }
1778
1779 // This is not an outlined function region - need to call __kmpc_int32
1780 // kmpc_global_thread_num(ident_t *loc).
1781 // Generate thread id value and cache this value for use across the
1782 // function.
1783 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1784 if (!Elem.second.ServiceInsertPt)
1785 setLocThreadIdInsertPt(CGF);
1786 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1787 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1788 llvm::CallInst *Call = CGF.Builder.CreateCall(
1789 createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1790 emitUpdateLocation(CGF, Loc));
1791 Call->setCallingConv(CGF.getRuntimeCC());
1792 Elem.second.ThreadID = Call;
1793 return Call;
1794 }
1795
functionFinished(CodeGenFunction & CGF)1796 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1797 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1798 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1799 clearLocThreadIdInsertPt(CGF);
1800 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1801 }
1802 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1803 for(auto *D : FunctionUDRMap[CGF.CurFn])
1804 UDRMap.erase(D);
1805 FunctionUDRMap.erase(CGF.CurFn);
1806 }
1807 auto I = FunctionUDMMap.find(CGF.CurFn);
1808 if (I != FunctionUDMMap.end()) {
1809 for(auto *D : I->second)
1810 UDMMap.erase(D);
1811 FunctionUDMMap.erase(I);
1812 }
1813 }
1814
getIdentTyPointerTy()1815 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1816 return IdentTy->getPointerTo();
1817 }
1818
getKmpc_MicroPointerTy()1819 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1820 if (!Kmpc_MicroTy) {
1821 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1822 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1823 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1824 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1825 }
1826 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1827 }
1828
createRuntimeFunction(unsigned Function)1829 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1830 llvm::FunctionCallee RTLFn = nullptr;
1831 switch (static_cast<OpenMPRTLFunction>(Function)) {
1832 case OMPRTL__kmpc_fork_call: {
1833 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1834 // microtask, ...);
1835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1836 getKmpc_MicroPointerTy()};
1837 auto *FnTy =
1838 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1839 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1840 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1841 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1842 llvm::LLVMContext &Ctx = F->getContext();
1843 llvm::MDBuilder MDB(Ctx);
1844 // Annotate the callback behavior of the __kmpc_fork_call:
1845 // - The callback callee is argument number 2 (microtask).
1846 // - The first two arguments of the callback callee are unknown (-1).
1847 // - All variadic arguments to the __kmpc_fork_call are passed to the
1848 // callback callee.
1849 F->addMetadata(
1850 llvm::LLVMContext::MD_callback,
1851 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1852 2, {-1, -1},
1853 /* VarArgsArePassed */ true)}));
1854 }
1855 }
1856 break;
1857 }
1858 case OMPRTL__kmpc_global_thread_num: {
1859 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1860 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1861 auto *FnTy =
1862 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1863 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1864 break;
1865 }
1866 case OMPRTL__kmpc_threadprivate_cached: {
1867 // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1868 // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1869 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1870 CGM.VoidPtrTy, CGM.SizeTy,
1871 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1872 auto *FnTy =
1873 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1875 break;
1876 }
1877 case OMPRTL__kmpc_critical: {
1878 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1879 // kmp_critical_name *crit);
1880 llvm::Type *TypeParams[] = {
1881 getIdentTyPointerTy(), CGM.Int32Ty,
1882 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1883 auto *FnTy =
1884 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1885 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1886 break;
1887 }
1888 case OMPRTL__kmpc_critical_with_hint: {
1889 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1890 // kmp_critical_name *crit, uintptr_t hint);
1891 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1892 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1893 CGM.IntPtrTy};
1894 auto *FnTy =
1895 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1896 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1897 break;
1898 }
1899 case OMPRTL__kmpc_threadprivate_register: {
1900 // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1901 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1902 // typedef void *(*kmpc_ctor)(void *);
1903 auto *KmpcCtorTy =
1904 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905 /*isVarArg*/ false)->getPointerTo();
1906 // typedef void *(*kmpc_cctor)(void *, void *);
1907 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1908 auto *KmpcCopyCtorTy =
1909 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1910 /*isVarArg*/ false)
1911 ->getPointerTo();
1912 // typedef void (*kmpc_dtor)(void *);
1913 auto *KmpcDtorTy =
1914 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1915 ->getPointerTo();
1916 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1917 KmpcCopyCtorTy, KmpcDtorTy};
1918 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1919 /*isVarArg*/ false);
1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1921 break;
1922 }
1923 case OMPRTL__kmpc_end_critical: {
1924 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1925 // kmp_critical_name *crit);
1926 llvm::Type *TypeParams[] = {
1927 getIdentTyPointerTy(), CGM.Int32Ty,
1928 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1929 auto *FnTy =
1930 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1931 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1932 break;
1933 }
1934 case OMPRTL__kmpc_cancel_barrier: {
1935 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1936 // global_tid);
1937 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1938 auto *FnTy =
1939 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1941 break;
1942 }
1943 case OMPRTL__kmpc_barrier: {
1944 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1945 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946 auto *FnTy =
1947 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1949 break;
1950 }
1951 case OMPRTL__kmpc_for_static_fini: {
1952 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1954 auto *FnTy =
1955 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1957 break;
1958 }
1959 case OMPRTL__kmpc_push_num_threads: {
1960 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1961 // kmp_int32 num_threads)
1962 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1963 CGM.Int32Ty};
1964 auto *FnTy =
1965 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1966 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1967 break;
1968 }
1969 case OMPRTL__kmpc_serialized_parallel: {
1970 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1971 // global_tid);
1972 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1973 auto *FnTy =
1974 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1975 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1976 break;
1977 }
1978 case OMPRTL__kmpc_end_serialized_parallel: {
1979 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1980 // global_tid);
1981 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1982 auto *FnTy =
1983 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1984 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1985 break;
1986 }
1987 case OMPRTL__kmpc_flush: {
1988 // Build void __kmpc_flush(ident_t *loc);
1989 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1990 auto *FnTy =
1991 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1992 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1993 break;
1994 }
1995 case OMPRTL__kmpc_master: {
1996 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1997 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1998 auto *FnTy =
1999 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2000 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2001 break;
2002 }
2003 case OMPRTL__kmpc_end_master: {
2004 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2005 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2006 auto *FnTy =
2007 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2009 break;
2010 }
2011 case OMPRTL__kmpc_omp_taskyield: {
2012 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2013 // int end_part);
2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2015 auto *FnTy =
2016 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2017 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2018 break;
2019 }
2020 case OMPRTL__kmpc_single: {
2021 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2022 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2023 auto *FnTy =
2024 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2025 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2026 break;
2027 }
2028 case OMPRTL__kmpc_end_single: {
2029 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2030 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2031 auto *FnTy =
2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2034 break;
2035 }
2036 case OMPRTL__kmpc_omp_task_alloc: {
2037 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2038 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039 // kmp_routine_entry_t *task_entry);
2040 assert(KmpRoutineEntryPtrTy != nullptr &&
2041 "Type kmp_routine_entry_t must be created.");
2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2044 // Return void * and then cast to particular kmp_task_t type.
2045 auto *FnTy =
2046 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2047 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2048 break;
2049 }
2050 case OMPRTL__kmpc_omp_target_task_alloc: {
2051 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2052 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2053 // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2054 assert(KmpRoutineEntryPtrTy != nullptr &&
2055 "Type kmp_routine_entry_t must be created.");
2056 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2058 CGM.Int64Ty};
2059 // Return void * and then cast to particular kmp_task_t type.
2060 auto *FnTy =
2061 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2062 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2063 break;
2064 }
2065 case OMPRTL__kmpc_omp_task: {
2066 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2067 // *new_task);
2068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2069 CGM.VoidPtrTy};
2070 auto *FnTy =
2071 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2072 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2073 break;
2074 }
2075 case OMPRTL__kmpc_copyprivate: {
2076 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2077 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2078 // kmp_int32 didit);
2079 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2080 auto *CpyFnTy =
2081 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2083 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2084 CGM.Int32Ty};
2085 auto *FnTy =
2086 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2087 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2088 break;
2089 }
2090 case OMPRTL__kmpc_reduce: {
2091 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2092 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2093 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2094 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2095 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2096 /*isVarArg=*/false);
2097 llvm::Type *TypeParams[] = {
2098 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2099 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2100 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2101 auto *FnTy =
2102 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2103 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2104 break;
2105 }
2106 case OMPRTL__kmpc_reduce_nowait: {
2107 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2108 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2109 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2110 // *lck);
2111 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2112 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2113 /*isVarArg=*/false);
2114 llvm::Type *TypeParams[] = {
2115 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2116 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2117 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2118 auto *FnTy =
2119 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2120 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2121 break;
2122 }
2123 case OMPRTL__kmpc_end_reduce: {
2124 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2125 // kmp_critical_name *lck);
2126 llvm::Type *TypeParams[] = {
2127 getIdentTyPointerTy(), CGM.Int32Ty,
2128 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2129 auto *FnTy =
2130 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2131 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2132 break;
2133 }
2134 case OMPRTL__kmpc_end_reduce_nowait: {
2135 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2136 // kmp_critical_name *lck);
2137 llvm::Type *TypeParams[] = {
2138 getIdentTyPointerTy(), CGM.Int32Ty,
2139 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2140 auto *FnTy =
2141 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2142 RTLFn =
2143 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2144 break;
2145 }
2146 case OMPRTL__kmpc_omp_task_begin_if0: {
2147 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2148 // *new_task);
2149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150 CGM.VoidPtrTy};
2151 auto *FnTy =
2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2153 RTLFn =
2154 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2155 break;
2156 }
2157 case OMPRTL__kmpc_omp_task_complete_if0: {
2158 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2159 // *new_task);
2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161 CGM.VoidPtrTy};
2162 auto *FnTy =
2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2164 RTLFn = CGM.CreateRuntimeFunction(FnTy,
2165 /*Name=*/"__kmpc_omp_task_complete_if0");
2166 break;
2167 }
2168 case OMPRTL__kmpc_ordered: {
2169 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2170 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2171 auto *FnTy =
2172 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2173 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2174 break;
2175 }
2176 case OMPRTL__kmpc_end_ordered: {
2177 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2178 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2179 auto *FnTy =
2180 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2181 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2182 break;
2183 }
2184 case OMPRTL__kmpc_omp_taskwait: {
2185 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2186 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2187 auto *FnTy =
2188 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2190 break;
2191 }
2192 case OMPRTL__kmpc_taskgroup: {
2193 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2194 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2195 auto *FnTy =
2196 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2197 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2198 break;
2199 }
2200 case OMPRTL__kmpc_end_taskgroup: {
2201 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2202 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2203 auto *FnTy =
2204 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2205 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2206 break;
2207 }
2208 case OMPRTL__kmpc_push_proc_bind: {
2209 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2210 // int proc_bind)
2211 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2212 auto *FnTy =
2213 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2214 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2215 break;
2216 }
2217 case OMPRTL__kmpc_omp_task_with_deps: {
2218 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2219 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2220 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2221 llvm::Type *TypeParams[] = {
2222 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2223 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
2224 auto *FnTy =
2225 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2226 RTLFn =
2227 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2228 break;
2229 }
2230 case OMPRTL__kmpc_omp_wait_deps: {
2231 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2232 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2233 // kmp_depend_info_t *noalias_dep_list);
2234 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2235 CGM.Int32Ty, CGM.VoidPtrTy,
2236 CGM.Int32Ty, CGM.VoidPtrTy};
2237 auto *FnTy =
2238 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2239 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2240 break;
2241 }
2242 case OMPRTL__kmpc_cancellationpoint: {
2243 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2244 // global_tid, kmp_int32 cncl_kind)
2245 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2246 auto *FnTy =
2247 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2248 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2249 break;
2250 }
2251 case OMPRTL__kmpc_cancel: {
2252 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2253 // kmp_int32 cncl_kind)
2254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2255 auto *FnTy =
2256 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2257 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2258 break;
2259 }
2260 case OMPRTL__kmpc_push_num_teams: {
2261 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2262 // kmp_int32 num_teams, kmp_int32 num_threads)
2263 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2264 CGM.Int32Ty};
2265 auto *FnTy =
2266 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2267 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2268 break;
2269 }
2270 case OMPRTL__kmpc_fork_teams: {
2271 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2272 // microtask, ...);
2273 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2274 getKmpc_MicroPointerTy()};
2275 auto *FnTy =
2276 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2277 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2278 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2279 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2280 llvm::LLVMContext &Ctx = F->getContext();
2281 llvm::MDBuilder MDB(Ctx);
2282 // Annotate the callback behavior of the __kmpc_fork_teams:
2283 // - The callback callee is argument number 2 (microtask).
2284 // - The first two arguments of the callback callee are unknown (-1).
2285 // - All variadic arguments to the __kmpc_fork_teams are passed to the
2286 // callback callee.
2287 F->addMetadata(
2288 llvm::LLVMContext::MD_callback,
2289 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2290 2, {-1, -1},
2291 /* VarArgsArePassed */ true)}));
2292 }
2293 }
2294 break;
2295 }
2296 case OMPRTL__kmpc_taskloop: {
2297 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2298 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2299 // sched, kmp_uint64 grainsize, void *task_dup);
2300 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2301 CGM.IntTy,
2302 CGM.VoidPtrTy,
2303 CGM.IntTy,
2304 CGM.Int64Ty->getPointerTo(),
2305 CGM.Int64Ty->getPointerTo(),
2306 CGM.Int64Ty,
2307 CGM.IntTy,
2308 CGM.IntTy,
2309 CGM.Int64Ty,
2310 CGM.VoidPtrTy};
2311 auto *FnTy =
2312 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2313 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2314 break;
2315 }
2316 case OMPRTL__kmpc_doacross_init: {
2317 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2318 // num_dims, struct kmp_dim *dims);
2319 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2320 CGM.Int32Ty,
2321 CGM.Int32Ty,
2322 CGM.VoidPtrTy};
2323 auto *FnTy =
2324 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2326 break;
2327 }
2328 case OMPRTL__kmpc_doacross_fini: {
2329 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2330 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2331 auto *FnTy =
2332 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2333 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2334 break;
2335 }
2336 case OMPRTL__kmpc_doacross_post: {
2337 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2338 // *vec);
2339 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2340 CGM.Int64Ty->getPointerTo()};
2341 auto *FnTy =
2342 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2344 break;
2345 }
2346 case OMPRTL__kmpc_doacross_wait: {
2347 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2348 // *vec);
2349 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2350 CGM.Int64Ty->getPointerTo()};
2351 auto *FnTy =
2352 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2353 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2354 break;
2355 }
2356 case OMPRTL__kmpc_task_reduction_init: {
2357 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2358 // *data);
2359 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2360 auto *FnTy =
2361 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2362 RTLFn =
2363 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2364 break;
2365 }
2366 case OMPRTL__kmpc_task_reduction_get_th_data: {
2367 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2368 // *d);
2369 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2370 auto *FnTy =
2371 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2372 RTLFn = CGM.CreateRuntimeFunction(
2373 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2374 break;
2375 }
2376 case OMPRTL__kmpc_alloc: {
2377 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2378 // al); omp_allocator_handle_t type is void *.
2379 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2380 auto *FnTy =
2381 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2382 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2383 break;
2384 }
2385 case OMPRTL__kmpc_free: {
2386 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2387 // al); omp_allocator_handle_t type is void *.
2388 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2389 auto *FnTy =
2390 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2391 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2392 break;
2393 }
2394 case OMPRTL__kmpc_push_target_tripcount: {
2395 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2396 // size);
2397 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2398 llvm::FunctionType *FnTy =
2399 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2400 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2401 break;
2402 }
2403 case OMPRTL__tgt_target: {
2404 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2405 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2406 // *arg_types);
2407 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408 CGM.VoidPtrTy,
2409 CGM.Int32Ty,
2410 CGM.VoidPtrPtrTy,
2411 CGM.VoidPtrPtrTy,
2412 CGM.Int64Ty->getPointerTo(),
2413 CGM.Int64Ty->getPointerTo()};
2414 auto *FnTy =
2415 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2416 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2417 break;
2418 }
2419 case OMPRTL__tgt_target_nowait: {
2420 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2421 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2422 // int64_t *arg_types);
2423 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2424 CGM.VoidPtrTy,
2425 CGM.Int32Ty,
2426 CGM.VoidPtrPtrTy,
2427 CGM.VoidPtrPtrTy,
2428 CGM.Int64Ty->getPointerTo(),
2429 CGM.Int64Ty->getPointerTo()};
2430 auto *FnTy =
2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2433 break;
2434 }
2435 case OMPRTL__tgt_target_teams: {
2436 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2437 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2438 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2439 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2440 CGM.VoidPtrTy,
2441 CGM.Int32Ty,
2442 CGM.VoidPtrPtrTy,
2443 CGM.VoidPtrPtrTy,
2444 CGM.Int64Ty->getPointerTo(),
2445 CGM.Int64Ty->getPointerTo(),
2446 CGM.Int32Ty,
2447 CGM.Int32Ty};
2448 auto *FnTy =
2449 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2450 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2451 break;
2452 }
2453 case OMPRTL__tgt_target_teams_nowait: {
2454 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2455 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2456 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2457 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2458 CGM.VoidPtrTy,
2459 CGM.Int32Ty,
2460 CGM.VoidPtrPtrTy,
2461 CGM.VoidPtrPtrTy,
2462 CGM.Int64Ty->getPointerTo(),
2463 CGM.Int64Ty->getPointerTo(),
2464 CGM.Int32Ty,
2465 CGM.Int32Ty};
2466 auto *FnTy =
2467 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2468 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2469 break;
2470 }
2471 case OMPRTL__tgt_register_requires: {
2472 // Build void __tgt_register_requires(int64_t flags);
2473 llvm::Type *TypeParams[] = {CGM.Int64Ty};
2474 auto *FnTy =
2475 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2477 break;
2478 }
2479 case OMPRTL__tgt_target_data_begin: {
2480 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2481 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2482 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2483 CGM.Int32Ty,
2484 CGM.VoidPtrPtrTy,
2485 CGM.VoidPtrPtrTy,
2486 CGM.Int64Ty->getPointerTo(),
2487 CGM.Int64Ty->getPointerTo()};
2488 auto *FnTy =
2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2491 break;
2492 }
2493 case OMPRTL__tgt_target_data_begin_nowait: {
2494 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2495 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2496 // *arg_types);
2497 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2498 CGM.Int32Ty,
2499 CGM.VoidPtrPtrTy,
2500 CGM.VoidPtrPtrTy,
2501 CGM.Int64Ty->getPointerTo(),
2502 CGM.Int64Ty->getPointerTo()};
2503 auto *FnTy =
2504 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2505 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2506 break;
2507 }
2508 case OMPRTL__tgt_target_data_end: {
2509 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2510 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2511 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2512 CGM.Int32Ty,
2513 CGM.VoidPtrPtrTy,
2514 CGM.VoidPtrPtrTy,
2515 CGM.Int64Ty->getPointerTo(),
2516 CGM.Int64Ty->getPointerTo()};
2517 auto *FnTy =
2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2520 break;
2521 }
2522 case OMPRTL__tgt_target_data_end_nowait: {
2523 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2524 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2525 // *arg_types);
2526 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2527 CGM.Int32Ty,
2528 CGM.VoidPtrPtrTy,
2529 CGM.VoidPtrPtrTy,
2530 CGM.Int64Ty->getPointerTo(),
2531 CGM.Int64Ty->getPointerTo()};
2532 auto *FnTy =
2533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2534 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2535 break;
2536 }
2537 case OMPRTL__tgt_target_data_update: {
2538 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2539 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2540 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2541 CGM.Int32Ty,
2542 CGM.VoidPtrPtrTy,
2543 CGM.VoidPtrPtrTy,
2544 CGM.Int64Ty->getPointerTo(),
2545 CGM.Int64Ty->getPointerTo()};
2546 auto *FnTy =
2547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2549 break;
2550 }
2551 case OMPRTL__tgt_target_data_update_nowait: {
2552 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2553 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2554 // *arg_types);
2555 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2556 CGM.Int32Ty,
2557 CGM.VoidPtrPtrTy,
2558 CGM.VoidPtrPtrTy,
2559 CGM.Int64Ty->getPointerTo(),
2560 CGM.Int64Ty->getPointerTo()};
2561 auto *FnTy =
2562 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2563 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2564 break;
2565 }
2566 case OMPRTL__tgt_mapper_num_components: {
2567 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2568 llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2569 auto *FnTy =
2570 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2572 break;
2573 }
2574 case OMPRTL__tgt_push_mapper_component: {
2575 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2576 // *base, void *begin, int64_t size, int64_t type);
2577 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2578 CGM.Int64Ty, CGM.Int64Ty};
2579 auto *FnTy =
2580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2581 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2582 break;
2583 }
2584 }
2585 assert(RTLFn && "Unable to find OpenMP runtime function");
2586 return RTLFn;
2587 }
2588
2589 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned)2590 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2591 assert((IVSize == 32 || IVSize == 64) &&
2592 "IV size is not compatible with the omp runtime");
2593 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2594 : "__kmpc_for_static_init_4u")
2595 : (IVSigned ? "__kmpc_for_static_init_8"
2596 : "__kmpc_for_static_init_8u");
2597 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2598 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2599 llvm::Type *TypeParams[] = {
2600 getIdentTyPointerTy(), // loc
2601 CGM.Int32Ty, // tid
2602 CGM.Int32Ty, // schedtype
2603 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2604 PtrTy, // p_lower
2605 PtrTy, // p_upper
2606 PtrTy, // p_stride
2607 ITy, // incr
2608 ITy // chunk
2609 };
2610 auto *FnTy =
2611 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2612 return CGM.CreateRuntimeFunction(FnTy, Name);
2613 }
2614
2615 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)2616 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2617 assert((IVSize == 32 || IVSize == 64) &&
2618 "IV size is not compatible with the omp runtime");
2619 StringRef Name =
2620 IVSize == 32
2621 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2622 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2623 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2624 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2625 CGM.Int32Ty, // tid
2626 CGM.Int32Ty, // schedtype
2627 ITy, // lower
2628 ITy, // upper
2629 ITy, // stride
2630 ITy // chunk
2631 };
2632 auto *FnTy =
2633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2634 return CGM.CreateRuntimeFunction(FnTy, Name);
2635 }
2636
2637 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)2638 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2639 assert((IVSize == 32 || IVSize == 64) &&
2640 "IV size is not compatible with the omp runtime");
2641 StringRef Name =
2642 IVSize == 32
2643 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2644 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2645 llvm::Type *TypeParams[] = {
2646 getIdentTyPointerTy(), // loc
2647 CGM.Int32Ty, // tid
2648 };
2649 auto *FnTy =
2650 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2651 return CGM.CreateRuntimeFunction(FnTy, Name);
2652 }
2653
2654 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)2655 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2656 assert((IVSize == 32 || IVSize == 64) &&
2657 "IV size is not compatible with the omp runtime");
2658 StringRef Name =
2659 IVSize == 32
2660 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2661 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2662 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2663 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2664 llvm::Type *TypeParams[] = {
2665 getIdentTyPointerTy(), // loc
2666 CGM.Int32Ty, // tid
2667 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2668 PtrTy, // p_lower
2669 PtrTy, // p_upper
2670 PtrTy // p_stride
2671 };
2672 auto *FnTy =
2673 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2674 return CGM.CreateRuntimeFunction(FnTy, Name);
2675 }
2676
2677 /// Obtain information that uniquely identifies a target entry. This
2678 /// consists of the file and device IDs as well as line number associated with
2679 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)2680 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2681 unsigned &DeviceID, unsigned &FileID,
2682 unsigned &LineNum) {
2683 SourceManager &SM = C.getSourceManager();
2684
2685 // The loc should be always valid and have a file ID (the user cannot use
2686 // #pragma directives in macros)
2687
2688 assert(Loc.isValid() && "Source location is expected to be always valid.");
2689
2690 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2691 assert(PLoc.isValid() && "Source location is expected to be always valid.");
2692
2693 llvm::sys::fs::UniqueID ID;
2694 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2695 SM.getDiagnostics().Report(diag::err_cannot_open_file)
2696 << PLoc.getFilename() << EC.message();
2697
2698 DeviceID = ID.getDevice();
2699 FileID = ID.getFile();
2700 LineNum = PLoc.getLine();
2701 }
2702
getAddrOfDeclareTargetVar(const VarDecl * VD)2703 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2704 if (CGM.getLangOpts().OpenMPSimd)
2705 return Address::invalid();
2706 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2707 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2708 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2709 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2710 HasRequiresUnifiedSharedMemory))) {
2711 SmallString<64> PtrName;
2712 {
2713 llvm::raw_svector_ostream OS(PtrName);
2714 OS << CGM.getMangledName(GlobalDecl(VD));
2715 if (!VD->isExternallyVisible()) {
2716 unsigned DeviceID, FileID, Line;
2717 getTargetEntryUniqueInfo(CGM.getContext(),
2718 VD->getCanonicalDecl()->getBeginLoc(),
2719 DeviceID, FileID, Line);
2720 OS << llvm::format("_%x", FileID);
2721 }
2722 OS << "_decl_tgt_ref_ptr";
2723 }
2724 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2725 if (!Ptr) {
2726 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2727 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2728 PtrName);
2729
2730 auto *GV = cast<llvm::GlobalVariable>(Ptr);
2731 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2732
2733 if (!CGM.getLangOpts().OpenMPIsDevice)
2734 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2735 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2736 }
2737 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2738 }
2739 return Address::invalid();
2740 }
2741
2742 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)2743 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2744 assert(!CGM.getLangOpts().OpenMPUseTLS ||
2745 !CGM.getContext().getTargetInfo().isTLSSupported());
2746 // Lookup the entry, lazily creating it if necessary.
2747 std::string Suffix = getName({"cache", ""});
2748 return getOrCreateInternalVariable(
2749 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2750 }
2751
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)2752 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2753 const VarDecl *VD,
2754 Address VDAddr,
2755 SourceLocation Loc) {
2756 if (CGM.getLangOpts().OpenMPUseTLS &&
2757 CGM.getContext().getTargetInfo().isTLSSupported())
2758 return VDAddr;
2759
2760 llvm::Type *VarTy = VDAddr.getElementType();
2761 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2762 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2763 CGM.Int8PtrTy),
2764 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2765 getOrCreateThreadPrivateCache(VD)};
2766 return Address(CGF.EmitRuntimeCall(
2767 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2768 VDAddr.getAlignment());
2769 }
2770
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)2771 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2772 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2773 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2774 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2775 // library.
2776 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2777 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2778 OMPLoc);
2779 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2780 // to register constructor/destructor for variable.
2781 llvm::Value *Args[] = {
2782 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2783 Ctor, CopyCtor, Dtor};
2784 CGF.EmitRuntimeCall(
2785 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2786 }
2787
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)2788 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2789 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2790 bool PerformInit, CodeGenFunction *CGF) {
2791 if (CGM.getLangOpts().OpenMPUseTLS &&
2792 CGM.getContext().getTargetInfo().isTLSSupported())
2793 return nullptr;
2794
2795 VD = VD->getDefinition(CGM.getContext());
2796 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2797 QualType ASTTy = VD->getType();
2798
2799 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2800 const Expr *Init = VD->getAnyInitializer();
2801 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2802 // Generate function that re-emits the declaration's initializer into the
2803 // threadprivate copy of the variable VD
2804 CodeGenFunction CtorCGF(CGM);
2805 FunctionArgList Args;
2806 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2807 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2808 ImplicitParamDecl::Other);
2809 Args.push_back(&Dst);
2810
2811 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2812 CGM.getContext().VoidPtrTy, Args);
2813 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2814 std::string Name = getName({"__kmpc_global_ctor_", ""});
2815 llvm::Function *Fn =
2816 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2817 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2818 Args, Loc, Loc);
2819 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2820 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2821 CGM.getContext().VoidPtrTy, Dst.getLocation());
2822 Address Arg = Address(ArgVal, VDAddr.getAlignment());
2823 Arg = CtorCGF.Builder.CreateElementBitCast(
2824 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2825 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2826 /*IsInitializer=*/true);
2827 ArgVal = CtorCGF.EmitLoadOfScalar(
2828 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2829 CGM.getContext().VoidPtrTy, Dst.getLocation());
2830 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2831 CtorCGF.FinishFunction();
2832 Ctor = Fn;
2833 }
2834 if (VD->getType().isDestructedType() != QualType::DK_none) {
2835 // Generate function that emits destructor call for the threadprivate copy
2836 // of the variable VD
2837 CodeGenFunction DtorCGF(CGM);
2838 FunctionArgList Args;
2839 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2840 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2841 ImplicitParamDecl::Other);
2842 Args.push_back(&Dst);
2843
2844 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2845 CGM.getContext().VoidTy, Args);
2846 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2847 std::string Name = getName({"__kmpc_global_dtor_", ""});
2848 llvm::Function *Fn =
2849 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2850 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2851 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2852 Loc, Loc);
2853 // Create a scope with an artificial location for the body of this function.
2854 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2855 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2856 DtorCGF.GetAddrOfLocalVar(&Dst),
2857 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2858 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2859 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2860 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2861 DtorCGF.FinishFunction();
2862 Dtor = Fn;
2863 }
2864 // Do not emit init function if it is not required.
2865 if (!Ctor && !Dtor)
2866 return nullptr;
2867
2868 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2869 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2870 /*isVarArg=*/false)
2871 ->getPointerTo();
2872 // Copying constructor for the threadprivate variable.
2873 // Must be NULL - reserved by runtime, but currently it requires that this
2874 // parameter is always NULL. Otherwise it fires assertion.
2875 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2876 if (Ctor == nullptr) {
2877 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2878 /*isVarArg=*/false)
2879 ->getPointerTo();
2880 Ctor = llvm::Constant::getNullValue(CtorTy);
2881 }
2882 if (Dtor == nullptr) {
2883 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2884 /*isVarArg=*/false)
2885 ->getPointerTo();
2886 Dtor = llvm::Constant::getNullValue(DtorTy);
2887 }
2888 if (!CGF) {
2889 auto *InitFunctionTy =
2890 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2891 std::string Name = getName({"__omp_threadprivate_init_", ""});
2892 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2893 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2894 CodeGenFunction InitCGF(CGM);
2895 FunctionArgList ArgList;
2896 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2897 CGM.getTypes().arrangeNullaryFunction(), ArgList,
2898 Loc, Loc);
2899 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2900 InitCGF.FinishFunction();
2901 return InitFunction;
2902 }
2903 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2904 }
2905 return nullptr;
2906 }
2907
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)2908 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2909 llvm::GlobalVariable *Addr,
2910 bool PerformInit) {
2911 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2912 !CGM.getLangOpts().OpenMPIsDevice)
2913 return false;
2914 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2915 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2916 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2917 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2918 HasRequiresUnifiedSharedMemory))
2919 return CGM.getLangOpts().OpenMPIsDevice;
2920 VD = VD->getDefinition(CGM.getContext());
2921 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2922 return CGM.getLangOpts().OpenMPIsDevice;
2923
2924 QualType ASTTy = VD->getType();
2925
2926 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2927 // Produce the unique prefix to identify the new target regions. We use
2928 // the source location of the variable declaration which we know to not
2929 // conflict with any target region.
2930 unsigned DeviceID;
2931 unsigned FileID;
2932 unsigned Line;
2933 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2934 SmallString<128> Buffer, Out;
2935 {
2936 llvm::raw_svector_ostream OS(Buffer);
2937 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2938 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2939 }
2940
2941 const Expr *Init = VD->getAnyInitializer();
2942 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2943 llvm::Constant *Ctor;
2944 llvm::Constant *ID;
2945 if (CGM.getLangOpts().OpenMPIsDevice) {
2946 // Generate function that re-emits the declaration's initializer into
2947 // the threadprivate copy of the variable VD
2948 CodeGenFunction CtorCGF(CGM);
2949
2950 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2951 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2952 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2953 FTy, Twine(Buffer, "_ctor"), FI, Loc);
2954 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2955 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2956 FunctionArgList(), Loc, Loc);
2957 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2958 CtorCGF.EmitAnyExprToMem(Init,
2959 Address(Addr, CGM.getContext().getDeclAlign(VD)),
2960 Init->getType().getQualifiers(),
2961 /*IsInitializer=*/true);
2962 CtorCGF.FinishFunction();
2963 Ctor = Fn;
2964 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2965 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2966 } else {
2967 Ctor = new llvm::GlobalVariable(
2968 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2969 llvm::GlobalValue::PrivateLinkage,
2970 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2971 ID = Ctor;
2972 }
2973
2974 // Register the information for the entry associated with the constructor.
2975 Out.clear();
2976 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2977 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2978 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2979 }
2980 if (VD->getType().isDestructedType() != QualType::DK_none) {
2981 llvm::Constant *Dtor;
2982 llvm::Constant *ID;
2983 if (CGM.getLangOpts().OpenMPIsDevice) {
2984 // Generate function that emits destructor call for the threadprivate
2985 // copy of the variable VD
2986 CodeGenFunction DtorCGF(CGM);
2987
2988 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2989 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991 FTy, Twine(Buffer, "_dtor"), FI, Loc);
2992 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2993 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994 FunctionArgList(), Loc, Loc);
2995 // Create a scope with an artificial location for the body of this
2996 // function.
2997 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2998 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2999 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3000 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3001 DtorCGF.FinishFunction();
3002 Dtor = Fn;
3003 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3004 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3005 } else {
3006 Dtor = new llvm::GlobalVariable(
3007 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3008 llvm::GlobalValue::PrivateLinkage,
3009 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3010 ID = Dtor;
3011 }
3012 // Register the information for the entry associated with the destructor.
3013 Out.clear();
3014 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3015 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3016 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3017 }
3018 return CGM.getLangOpts().OpenMPIsDevice;
3019 }
3020
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)3021 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3022 QualType VarType,
3023 StringRef Name) {
3024 std::string Suffix = getName({"artificial", ""});
3025 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3026 llvm::Value *GAddr =
3027 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3028 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3029 CGM.getTarget().isTLSSupported()) {
3030 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3031 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3032 }
3033 std::string CacheSuffix = getName({"cache", ""});
3034 llvm::Value *Args[] = {
3035 emitUpdateLocation(CGF, SourceLocation()),
3036 getThreadID(CGF, SourceLocation()),
3037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3038 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3039 /*isSigned=*/false),
3040 getOrCreateInternalVariable(
3041 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3042 return Address(
3043 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF.EmitRuntimeCall(
3045 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3046 VarLVType->getPointerTo(/*AddrSpace=*/0)),
3047 CGM.getContext().getTypeAlignInChars(VarType));
3048 }
3049
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)3050 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3051 const RegionCodeGenTy &ThenGen,
3052 const RegionCodeGenTy &ElseGen) {
3053 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3054
3055 // If the condition constant folds and can be elided, try to avoid emitting
3056 // the condition and the dead arm of the if/else.
3057 bool CondConstant;
3058 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3059 if (CondConstant)
3060 ThenGen(CGF);
3061 else
3062 ElseGen(CGF);
3063 return;
3064 }
3065
3066 // Otherwise, the condition did not fold, or we couldn't elide it. Just
3067 // emit the conditional branch.
3068 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3069 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3070 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3071 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3072
3073 // Emit the 'then' code.
3074 CGF.EmitBlock(ThenBlock);
3075 ThenGen(CGF);
3076 CGF.EmitBranch(ContBlock);
3077 // Emit the 'else' code if present.
3078 // There is no need to emit line number for unconditional branch.
3079 (void)ApplyDebugLocation::CreateEmpty(CGF);
3080 CGF.EmitBlock(ElseBlock);
3081 ElseGen(CGF);
3082 // There is no need to emit line number for unconditional branch.
3083 (void)ApplyDebugLocation::CreateEmpty(CGF);
3084 CGF.EmitBranch(ContBlock);
3085 // Emit the continuation block for code after the if.
3086 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3087 }
3088
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)3089 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3090 llvm::Function *OutlinedFn,
3091 ArrayRef<llvm::Value *> CapturedVars,
3092 const Expr *IfCond) {
3093 if (!CGF.HaveInsertPoint())
3094 return;
3095 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3096 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3097 PrePostActionTy &) {
3098 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3099 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3100 llvm::Value *Args[] = {
3101 RTLoc,
3102 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3103 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3104 llvm::SmallVector<llvm::Value *, 16> RealArgs;
3105 RealArgs.append(std::begin(Args), std::end(Args));
3106 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3107
3108 llvm::FunctionCallee RTLFn =
3109 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3110 CGF.EmitRuntimeCall(RTLFn, RealArgs);
3111 };
3112 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3113 PrePostActionTy &) {
3114 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3115 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3116 // Build calls:
3117 // __kmpc_serialized_parallel(&Loc, GTid);
3118 llvm::Value *Args[] = {RTLoc, ThreadID};
3119 CGF.EmitRuntimeCall(
3120 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3121
3122 // OutlinedFn(>id, &zero_bound, CapturedStruct);
3123 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3124 Address ZeroAddrBound =
3125 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3126 /*Name=*/".bound.zero.addr");
3127 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3128 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3129 // ThreadId for serialized parallels is 0.
3130 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3131 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3132 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3133 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3134
3135 // __kmpc_end_serialized_parallel(&Loc, GTid);
3136 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3137 CGF.EmitRuntimeCall(
3138 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3139 EndArgs);
3140 };
3141 if (IfCond) {
3142 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3143 } else {
3144 RegionCodeGenTy ThenRCG(ThenGen);
3145 ThenRCG(CGF);
3146 }
3147 }
3148
3149 // If we're inside an (outlined) parallel region, use the region info's
3150 // thread-ID variable (it is passed in a first argument of the outlined function
3151 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3152 // regular serial code region, get thread ID by calling kmp_int32
3153 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3154 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)3155 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3156 SourceLocation Loc) {
3157 if (auto *OMPRegionInfo =
3158 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3159 if (OMPRegionInfo->getThreadIDVariable())
3160 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3161
3162 llvm::Value *ThreadID = getThreadID(CGF, Loc);
3163 QualType Int32Ty =
3164 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3165 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3166 CGF.EmitStoreOfScalar(ThreadID,
3167 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3168
3169 return ThreadIDTemp;
3170 }
3171
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)3172 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3173 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3174 SmallString<256> Buffer;
3175 llvm::raw_svector_ostream Out(Buffer);
3176 Out << Name;
3177 StringRef RuntimeName = Out.str();
3178 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3179 if (Elem.second) {
3180 assert(Elem.second->getType()->getPointerElementType() == Ty &&
3181 "OMP internal variable has different type than requested");
3182 return &*Elem.second;
3183 }
3184
3185 return Elem.second = new llvm::GlobalVariable(
3186 CGM.getModule(), Ty, /*IsConstant*/ false,
3187 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3188 Elem.first(), /*InsertBefore=*/nullptr,
3189 llvm::GlobalValue::NotThreadLocal, AddressSpace);
3190 }
3191
getCriticalRegionLock(StringRef CriticalName)3192 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3193 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3194 std::string Name = getName({Prefix, "var"});
3195 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3196 }
3197
3198 namespace {
3199 /// Common pre(post)-action for different OpenMP constructs.
3200 class CommonActionTy final : public PrePostActionTy {
3201 llvm::FunctionCallee EnterCallee;
3202 ArrayRef<llvm::Value *> EnterArgs;
3203 llvm::FunctionCallee ExitCallee;
3204 ArrayRef<llvm::Value *> ExitArgs;
3205 bool Conditional;
3206 llvm::BasicBlock *ContBlock = nullptr;
3207
3208 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)3209 CommonActionTy(llvm::FunctionCallee EnterCallee,
3210 ArrayRef<llvm::Value *> EnterArgs,
3211 llvm::FunctionCallee ExitCallee,
3212 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3213 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3214 ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)3215 void Enter(CodeGenFunction &CGF) override {
3216 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3217 if (Conditional) {
3218 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3219 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3220 ContBlock = CGF.createBasicBlock("omp_if.end");
3221 // Generate the branch (If-stmt)
3222 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3223 CGF.EmitBlock(ThenBlock);
3224 }
3225 }
Done(CodeGenFunction & CGF)3226 void Done(CodeGenFunction &CGF) {
3227 // Emit the rest of blocks/branches
3228 CGF.EmitBranch(ContBlock);
3229 CGF.EmitBlock(ContBlock, true);
3230 }
Exit(CodeGenFunction & CGF)3231 void Exit(CodeGenFunction &CGF) override {
3232 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3233 }
3234 };
3235 } // anonymous namespace
3236
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)3237 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3238 StringRef CriticalName,
3239 const RegionCodeGenTy &CriticalOpGen,
3240 SourceLocation Loc, const Expr *Hint) {
3241 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3242 // CriticalOpGen();
3243 // __kmpc_end_critical(ident_t *, gtid, Lock);
3244 // Prepare arguments and build a call to __kmpc_critical
3245 if (!CGF.HaveInsertPoint())
3246 return;
3247 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3248 getCriticalRegionLock(CriticalName)};
3249 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3250 std::end(Args));
3251 if (Hint) {
3252 EnterArgs.push_back(CGF.Builder.CreateIntCast(
3253 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3254 }
3255 CommonActionTy Action(
3256 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3257 : OMPRTL__kmpc_critical),
3258 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3259 CriticalOpGen.setAction(Action);
3260 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3261 }
3262
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)3263 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3264 const RegionCodeGenTy &MasterOpGen,
3265 SourceLocation Loc) {
3266 if (!CGF.HaveInsertPoint())
3267 return;
3268 // if(__kmpc_master(ident_t *, gtid)) {
3269 // MasterOpGen();
3270 // __kmpc_end_master(ident_t *, gtid);
3271 // }
3272 // Prepare arguments and build a call to __kmpc_master
3273 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3274 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3275 createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3276 /*Conditional=*/true);
3277 MasterOpGen.setAction(Action);
3278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3279 Action.Done(CGF);
3280 }
3281
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)3282 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3283 SourceLocation Loc) {
3284 if (!CGF.HaveInsertPoint())
3285 return;
3286 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3287 llvm::Value *Args[] = {
3288 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3289 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3290 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3291 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3292 Region->emitUntiedSwitch(CGF);
3293 }
3294
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)3295 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3296 const RegionCodeGenTy &TaskgroupOpGen,
3297 SourceLocation Loc) {
3298 if (!CGF.HaveInsertPoint())
3299 return;
3300 // __kmpc_taskgroup(ident_t *, gtid);
3301 // TaskgroupOpGen();
3302 // __kmpc_end_taskgroup(ident_t *, gtid);
3303 // Prepare arguments and build a call to __kmpc_taskgroup
3304 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3305 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3306 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3307 Args);
3308 TaskgroupOpGen.setAction(Action);
3309 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3310 }
3311
3312 /// Given an array of pointers to variables, project the address of a
3313 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)3314 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3315 unsigned Index, const VarDecl *Var) {
3316 // Pull out the pointer to the variable.
3317 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3318 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3319
3320 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3321 Addr = CGF.Builder.CreateElementBitCast(
3322 Addr, CGF.ConvertTypeForMem(Var->getType()));
3323 return Addr;
3324 }
3325
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)3326 static llvm::Value *emitCopyprivateCopyFunction(
3327 CodeGenModule &CGM, llvm::Type *ArgsType,
3328 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3329 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3330 SourceLocation Loc) {
3331 ASTContext &C = CGM.getContext();
3332 // void copy_func(void *LHSArg, void *RHSArg);
3333 FunctionArgList Args;
3334 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3335 ImplicitParamDecl::Other);
3336 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3337 ImplicitParamDecl::Other);
3338 Args.push_back(&LHSArg);
3339 Args.push_back(&RHSArg);
3340 const auto &CGFI =
3341 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3342 std::string Name =
3343 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3344 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3345 llvm::GlobalValue::InternalLinkage, Name,
3346 &CGM.getModule());
3347 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3348 Fn->setDoesNotRecurse();
3349 CodeGenFunction CGF(CGM);
3350 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3351 // Dest = (void*[n])(LHSArg);
3352 // Src = (void*[n])(RHSArg);
3353 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3354 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3355 ArgsType), CGF.getPointerAlign());
3356 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3357 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3358 ArgsType), CGF.getPointerAlign());
3359 // *(Type0*)Dst[0] = *(Type0*)Src[0];
3360 // *(Type1*)Dst[1] = *(Type1*)Src[1];
3361 // ...
3362 // *(Typen*)Dst[n] = *(Typen*)Src[n];
3363 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3364 const auto *DestVar =
3365 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3366 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3367
3368 const auto *SrcVar =
3369 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3370 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3371
3372 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3373 QualType Type = VD->getType();
3374 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3375 }
3376 CGF.FinishFunction();
3377 return Fn;
3378 }
3379
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)3380 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3381 const RegionCodeGenTy &SingleOpGen,
3382 SourceLocation Loc,
3383 ArrayRef<const Expr *> CopyprivateVars,
3384 ArrayRef<const Expr *> SrcExprs,
3385 ArrayRef<const Expr *> DstExprs,
3386 ArrayRef<const Expr *> AssignmentOps) {
3387 if (!CGF.HaveInsertPoint())
3388 return;
3389 assert(CopyprivateVars.size() == SrcExprs.size() &&
3390 CopyprivateVars.size() == DstExprs.size() &&
3391 CopyprivateVars.size() == AssignmentOps.size());
3392 ASTContext &C = CGM.getContext();
3393 // int32 did_it = 0;
3394 // if(__kmpc_single(ident_t *, gtid)) {
3395 // SingleOpGen();
3396 // __kmpc_end_single(ident_t *, gtid);
3397 // did_it = 1;
3398 // }
3399 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3400 // <copy_func>, did_it);
3401
3402 Address DidIt = Address::invalid();
3403 if (!CopyprivateVars.empty()) {
3404 // int32 did_it = 0;
3405 QualType KmpInt32Ty =
3406 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3407 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3408 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3409 }
3410 // Prepare arguments and build a call to __kmpc_single
3411 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3412 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3413 createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3414 /*Conditional=*/true);
3415 SingleOpGen.setAction(Action);
3416 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3417 if (DidIt.isValid()) {
3418 // did_it = 1;
3419 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3420 }
3421 Action.Done(CGF);
3422 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3423 // <copy_func>, did_it);
3424 if (DidIt.isValid()) {
3425 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3426 QualType CopyprivateArrayTy = C.getConstantArrayType(
3427 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3428 /*IndexTypeQuals=*/0);
3429 // Create a list of all private variables for copyprivate.
3430 Address CopyprivateList =
3431 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3432 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3433 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3434 CGF.Builder.CreateStore(
3435 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3436 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3437 CGF.VoidPtrTy),
3438 Elem);
3439 }
3440 // Build function that copies private values from single region to all other
3441 // threads in the corresponding parallel region.
3442 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3443 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3444 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3445 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3446 Address CL =
3447 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3448 CGF.VoidPtrTy);
3449 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3450 llvm::Value *Args[] = {
3451 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3452 getThreadID(CGF, Loc), // i32 <gtid>
3453 BufSize, // size_t <buf_size>
3454 CL.getPointer(), // void *<copyprivate list>
3455 CpyFn, // void (*) (void *, void *) <copy_func>
3456 DidItVal // i32 did_it
3457 };
3458 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3459 }
3460 }
3461
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)3462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3463 const RegionCodeGenTy &OrderedOpGen,
3464 SourceLocation Loc, bool IsThreads) {
3465 if (!CGF.HaveInsertPoint())
3466 return;
3467 // __kmpc_ordered(ident_t *, gtid);
3468 // OrderedOpGen();
3469 // __kmpc_end_ordered(ident_t *, gtid);
3470 // Prepare arguments and build a call to __kmpc_ordered
3471 if (IsThreads) {
3472 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3473 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3474 createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3475 Args);
3476 OrderedOpGen.setAction(Action);
3477 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3478 return;
3479 }
3480 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3481 }
3482
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)3483 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3484 unsigned Flags;
3485 if (Kind == OMPD_for)
3486 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3487 else if (Kind == OMPD_sections)
3488 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3489 else if (Kind == OMPD_single)
3490 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3491 else if (Kind == OMPD_barrier)
3492 Flags = OMP_IDENT_BARRIER_EXPL;
3493 else
3494 Flags = OMP_IDENT_BARRIER_IMPL;
3495 return Flags;
3496 }
3497
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const3498 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3499 CodeGenFunction &CGF, const OMPLoopDirective &S,
3500 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3501 // Check if the loop directive is actually a doacross loop directive. In this
3502 // case choose static, 1 schedule.
3503 if (llvm::any_of(
3504 S.getClausesOfKind<OMPOrderedClause>(),
3505 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3506 ScheduleKind = OMPC_SCHEDULE_static;
3507 // Chunk size is 1 in this case.
3508 llvm::APInt ChunkSize(32, 1);
3509 ChunkExpr = IntegerLiteral::Create(
3510 CGF.getContext(), ChunkSize,
3511 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3512 SourceLocation());
3513 }
3514 }
3515
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)3516 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3517 OpenMPDirectiveKind Kind, bool EmitChecks,
3518 bool ForceSimpleCall) {
3519 // Check if we should use the OMPBuilder
3520 auto *OMPRegionInfo =
3521 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3522 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3523 if (OMPBuilder) {
3524 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3525 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3526 return;
3527 }
3528
3529 if (!CGF.HaveInsertPoint())
3530 return;
3531 // Build call __kmpc_cancel_barrier(loc, thread_id);
3532 // Build call __kmpc_barrier(loc, thread_id);
3533 unsigned Flags = getDefaultFlagsForBarriers(Kind);
3534 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3535 // thread_id);
3536 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3537 getThreadID(CGF, Loc)};
3538 if (OMPRegionInfo) {
3539 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3540 llvm::Value *Result = CGF.EmitRuntimeCall(
3541 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3542 if (EmitChecks) {
3543 // if (__kmpc_cancel_barrier()) {
3544 // exit from construct;
3545 // }
3546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3547 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3548 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3549 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3550 CGF.EmitBlock(ExitBB);
3551 // exit from construct;
3552 CodeGenFunction::JumpDest CancelDestination =
3553 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3554 CGF.EmitBranchThroughCleanup(CancelDestination);
3555 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3556 }
3557 return;
3558 }
3559 }
3560 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3561 }
3562
3563 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)3564 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3565 bool Chunked, bool Ordered) {
3566 switch (ScheduleKind) {
3567 case OMPC_SCHEDULE_static:
3568 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3569 : (Ordered ? OMP_ord_static : OMP_sch_static);
3570 case OMPC_SCHEDULE_dynamic:
3571 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3572 case OMPC_SCHEDULE_guided:
3573 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3574 case OMPC_SCHEDULE_runtime:
3575 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3576 case OMPC_SCHEDULE_auto:
3577 return Ordered ? OMP_ord_auto : OMP_sch_auto;
3578 case OMPC_SCHEDULE_unknown:
3579 assert(!Chunked && "chunk was specified but schedule kind not known");
3580 return Ordered ? OMP_ord_static : OMP_sch_static;
3581 }
3582 llvm_unreachable("Unexpected runtime schedule");
3583 }
3584
3585 /// Map the OpenMP distribute schedule to the runtime enumeration.
3586 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)3587 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3588 // only static is allowed for dist_schedule
3589 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3590 }
3591
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const3592 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3593 bool Chunked) const {
3594 OpenMPSchedType Schedule =
3595 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3596 return Schedule == OMP_sch_static;
3597 }
3598
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const3599 bool CGOpenMPRuntime::isStaticNonchunked(
3600 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3601 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3602 return Schedule == OMP_dist_sch_static;
3603 }
3604
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const3605 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3606 bool Chunked) const {
3607 OpenMPSchedType Schedule =
3608 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3609 return Schedule == OMP_sch_static_chunked;
3610 }
3611
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const3612 bool CGOpenMPRuntime::isStaticChunked(
3613 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3614 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3615 return Schedule == OMP_dist_sch_static_chunked;
3616 }
3617
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const3618 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3619 OpenMPSchedType Schedule =
3620 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3621 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3622 return Schedule != OMP_sch_static;
3623 }
3624
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)3625 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3626 OpenMPScheduleClauseModifier M1,
3627 OpenMPScheduleClauseModifier M2) {
3628 int Modifier = 0;
3629 switch (M1) {
3630 case OMPC_SCHEDULE_MODIFIER_monotonic:
3631 Modifier = OMP_sch_modifier_monotonic;
3632 break;
3633 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3634 Modifier = OMP_sch_modifier_nonmonotonic;
3635 break;
3636 case OMPC_SCHEDULE_MODIFIER_simd:
3637 if (Schedule == OMP_sch_static_chunked)
3638 Schedule = OMP_sch_static_balanced_chunked;
3639 break;
3640 case OMPC_SCHEDULE_MODIFIER_last:
3641 case OMPC_SCHEDULE_MODIFIER_unknown:
3642 break;
3643 }
3644 switch (M2) {
3645 case OMPC_SCHEDULE_MODIFIER_monotonic:
3646 Modifier = OMP_sch_modifier_monotonic;
3647 break;
3648 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3649 Modifier = OMP_sch_modifier_nonmonotonic;
3650 break;
3651 case OMPC_SCHEDULE_MODIFIER_simd:
3652 if (Schedule == OMP_sch_static_chunked)
3653 Schedule = OMP_sch_static_balanced_chunked;
3654 break;
3655 case OMPC_SCHEDULE_MODIFIER_last:
3656 case OMPC_SCHEDULE_MODIFIER_unknown:
3657 break;
3658 }
3659 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3660 // If the static schedule kind is specified or if the ordered clause is
3661 // specified, and if the nonmonotonic modifier is not specified, the effect is
3662 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3663 // modifier is specified, the effect is as if the nonmonotonic modifier is
3664 // specified.
3665 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3666 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3667 Schedule == OMP_sch_static_balanced_chunked ||
3668 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3669 Schedule == OMP_dist_sch_static_chunked ||
3670 Schedule == OMP_dist_sch_static))
3671 Modifier = OMP_sch_modifier_nonmonotonic;
3672 }
3673 return Schedule | Modifier;
3674 }
3675
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)3676 void CGOpenMPRuntime::emitForDispatchInit(
3677 CodeGenFunction &CGF, SourceLocation Loc,
3678 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3679 bool Ordered, const DispatchRTInput &DispatchValues) {
3680 if (!CGF.HaveInsertPoint())
3681 return;
3682 OpenMPSchedType Schedule = getRuntimeSchedule(
3683 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3684 assert(Ordered ||
3685 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3686 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3687 Schedule != OMP_sch_static_balanced_chunked));
3688 // Call __kmpc_dispatch_init(
3689 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3690 // kmp_int[32|64] lower, kmp_int[32|64] upper,
3691 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3692
3693 // If the Chunk was not specified in the clause - use default value 1.
3694 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3695 : CGF.Builder.getIntN(IVSize, 1);
3696 llvm::Value *Args[] = {
3697 emitUpdateLocation(CGF, Loc),
3698 getThreadID(CGF, Loc),
3699 CGF.Builder.getInt32(addMonoNonMonoModifier(
3700 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3701 DispatchValues.LB, // Lower
3702 DispatchValues.UB, // Upper
3703 CGF.Builder.getIntN(IVSize, 1), // Stride
3704 Chunk // Chunk
3705 };
3706 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3707 }
3708
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)3709 static void emitForStaticInitCall(
3710 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3711 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3712 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3713 const CGOpenMPRuntime::StaticRTInput &Values) {
3714 if (!CGF.HaveInsertPoint())
3715 return;
3716
3717 assert(!Values.Ordered);
3718 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3719 Schedule == OMP_sch_static_balanced_chunked ||
3720 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3721 Schedule == OMP_dist_sch_static ||
3722 Schedule == OMP_dist_sch_static_chunked);
3723
3724 // Call __kmpc_for_static_init(
3725 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3726 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3727 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3728 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3729 llvm::Value *Chunk = Values.Chunk;
3730 if (Chunk == nullptr) {
3731 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3732 Schedule == OMP_dist_sch_static) &&
3733 "expected static non-chunked schedule");
3734 // If the Chunk was not specified in the clause - use default value 1.
3735 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3736 } else {
3737 assert((Schedule == OMP_sch_static_chunked ||
3738 Schedule == OMP_sch_static_balanced_chunked ||
3739 Schedule == OMP_ord_static_chunked ||
3740 Schedule == OMP_dist_sch_static_chunked) &&
3741 "expected static chunked schedule");
3742 }
3743 llvm::Value *Args[] = {
3744 UpdateLocation,
3745 ThreadId,
3746 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3747 M2)), // Schedule type
3748 Values.IL.getPointer(), // &isLastIter
3749 Values.LB.getPointer(), // &LB
3750 Values.UB.getPointer(), // &UB
3751 Values.ST.getPointer(), // &Stride
3752 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3753 Chunk // Chunk
3754 };
3755 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3756 }
3757
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)3758 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3759 SourceLocation Loc,
3760 OpenMPDirectiveKind DKind,
3761 const OpenMPScheduleTy &ScheduleKind,
3762 const StaticRTInput &Values) {
3763 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3764 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3765 assert(isOpenMPWorksharingDirective(DKind) &&
3766 "Expected loop-based or sections-based directive.");
3767 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3768 isOpenMPLoopDirective(DKind)
3769 ? OMP_IDENT_WORK_LOOP
3770 : OMP_IDENT_WORK_SECTIONS);
3771 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3772 llvm::FunctionCallee StaticInitFunction =
3773 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3774 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3775 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3776 }
3777
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)3778 void CGOpenMPRuntime::emitDistributeStaticInit(
3779 CodeGenFunction &CGF, SourceLocation Loc,
3780 OpenMPDistScheduleClauseKind SchedKind,
3781 const CGOpenMPRuntime::StaticRTInput &Values) {
3782 OpenMPSchedType ScheduleNum =
3783 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3784 llvm::Value *UpdatedLocation =
3785 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3786 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3787 llvm::FunctionCallee StaticInitFunction =
3788 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3789 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3790 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3791 OMPC_SCHEDULE_MODIFIER_unknown, Values);
3792 }
3793
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)3794 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3795 SourceLocation Loc,
3796 OpenMPDirectiveKind DKind) {
3797 if (!CGF.HaveInsertPoint())
3798 return;
3799 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3800 llvm::Value *Args[] = {
3801 emitUpdateLocation(CGF, Loc,
3802 isOpenMPDistributeDirective(DKind)
3803 ? OMP_IDENT_WORK_DISTRIBUTE
3804 : isOpenMPLoopDirective(DKind)
3805 ? OMP_IDENT_WORK_LOOP
3806 : OMP_IDENT_WORK_SECTIONS),
3807 getThreadID(CGF, Loc)};
3808 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3809 Args);
3810 }
3811
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)3812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3813 SourceLocation Loc,
3814 unsigned IVSize,
3815 bool IVSigned) {
3816 if (!CGF.HaveInsertPoint())
3817 return;
3818 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3819 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3820 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3821 }
3822
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)3823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3824 SourceLocation Loc, unsigned IVSize,
3825 bool IVSigned, Address IL,
3826 Address LB, Address UB,
3827 Address ST) {
3828 // Call __kmpc_dispatch_next(
3829 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3830 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3831 // kmp_int[32|64] *p_stride);
3832 llvm::Value *Args[] = {
3833 emitUpdateLocation(CGF, Loc),
3834 getThreadID(CGF, Loc),
3835 IL.getPointer(), // &isLastIter
3836 LB.getPointer(), // &Lower
3837 UB.getPointer(), // &Upper
3838 ST.getPointer() // &Stride
3839 };
3840 llvm::Value *Call =
3841 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3842 return CGF.EmitScalarConversion(
3843 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3844 CGF.getContext().BoolTy, Loc);
3845 }
3846
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)3847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3848 llvm::Value *NumThreads,
3849 SourceLocation Loc) {
3850 if (!CGF.HaveInsertPoint())
3851 return;
3852 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3853 llvm::Value *Args[] = {
3854 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3855 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3856 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3857 Args);
3858 }
3859
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)3860 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3861 ProcBindKind ProcBind,
3862 SourceLocation Loc) {
3863 if (!CGF.HaveInsertPoint())
3864 return;
3865 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3866 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3867 llvm::Value *Args[] = {
3868 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3869 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3870 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3871 }
3872
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc)3873 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3874 SourceLocation Loc) {
3875 if (!CGF.HaveInsertPoint())
3876 return;
3877 // Build call void __kmpc_flush(ident_t *loc)
3878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3879 emitUpdateLocation(CGF, Loc));
3880 }
3881
3882 namespace {
3883 /// Indexes of fields for type kmp_task_t.
3884 enum KmpTaskTFields {
3885 /// List of shared variables.
3886 KmpTaskTShareds,
3887 /// Task routine.
3888 KmpTaskTRoutine,
3889 /// Partition id for the untied tasks.
3890 KmpTaskTPartId,
3891 /// Function with call of destructors for private variables.
3892 Data1,
3893 /// Task priority.
3894 Data2,
3895 /// (Taskloops only) Lower bound.
3896 KmpTaskTLowerBound,
3897 /// (Taskloops only) Upper bound.
3898 KmpTaskTUpperBound,
3899 /// (Taskloops only) Stride.
3900 KmpTaskTStride,
3901 /// (Taskloops only) Is last iteration flag.
3902 KmpTaskTLastIter,
3903 /// (Taskloops only) Reduction data.
3904 KmpTaskTReductions,
3905 };
3906 } // anonymous namespace
3907
empty() const3908 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3909 return OffloadEntriesTargetRegion.empty() &&
3910 OffloadEntriesDeviceGlobalVar.empty();
3911 }
3912
3913 /// Initialize target region entry.
3914 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)3915 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3916 StringRef ParentName, unsigned LineNum,
3917 unsigned Order) {
3918 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3919 "only required for the device "
3920 "code generation.");
3921 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3922 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3923 OMPTargetRegionEntryTargetRegion);
3924 ++OffloadingEntriesNum;
3925 }
3926
3927 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)3928 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3929 StringRef ParentName, unsigned LineNum,
3930 llvm::Constant *Addr, llvm::Constant *ID,
3931 OMPTargetRegionEntryKind Flags) {
3932 // If we are emitting code for a target, the entry is already initialized,
3933 // only has to be registered.
3934 if (CGM.getLangOpts().OpenMPIsDevice) {
3935 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3936 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3937 DiagnosticsEngine::Error,
3938 "Unable to find target region on line '%0' in the device code.");
3939 CGM.getDiags().Report(DiagID) << LineNum;
3940 return;
3941 }
3942 auto &Entry =
3943 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3944 assert(Entry.isValid() && "Entry not initialized!");
3945 Entry.setAddress(Addr);
3946 Entry.setID(ID);
3947 Entry.setFlags(Flags);
3948 } else {
3949 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3950 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3951 ++OffloadingEntriesNum;
3952 }
3953 }
3954
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum) const3955 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3956 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3957 unsigned LineNum) const {
3958 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3959 if (PerDevice == OffloadEntriesTargetRegion.end())
3960 return false;
3961 auto PerFile = PerDevice->second.find(FileID);
3962 if (PerFile == PerDevice->second.end())
3963 return false;
3964 auto PerParentName = PerFile->second.find(ParentName);
3965 if (PerParentName == PerFile->second.end())
3966 return false;
3967 auto PerLine = PerParentName->second.find(LineNum);
3968 if (PerLine == PerParentName->second.end())
3969 return false;
3970 // Fail if this entry is already registered.
3971 if (PerLine->second.getAddress() || PerLine->second.getID())
3972 return false;
3973 return true;
3974 }
3975
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3977 const OffloadTargetRegionEntryInfoActTy &Action) {
3978 // Scan all target region entries and perform the provided action.
3979 for (const auto &D : OffloadEntriesTargetRegion)
3980 for (const auto &F : D.second)
3981 for (const auto &P : F.second)
3982 for (const auto &L : P.second)
3983 Action(D.first, F.first, P.first(), L.first, L.second);
3984 }
3985
3986 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)3987 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3988 OMPTargetGlobalVarEntryKind Flags,
3989 unsigned Order) {
3990 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3991 "only required for the device "
3992 "code generation.");
3993 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3994 ++OffloadingEntriesNum;
3995 }
3996
3997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)3998 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3999 CharUnits VarSize,
4000 OMPTargetGlobalVarEntryKind Flags,
4001 llvm::GlobalValue::LinkageTypes Linkage) {
4002 if (CGM.getLangOpts().OpenMPIsDevice) {
4003 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4004 assert(Entry.isValid() && Entry.getFlags() == Flags &&
4005 "Entry not initialized!");
4006 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4007 "Resetting with the new address.");
4008 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4009 if (Entry.getVarSize().isZero()) {
4010 Entry.setVarSize(VarSize);
4011 Entry.setLinkage(Linkage);
4012 }
4013 return;
4014 }
4015 Entry.setVarSize(VarSize);
4016 Entry.setLinkage(Linkage);
4017 Entry.setAddress(Addr);
4018 } else {
4019 if (hasDeviceGlobalVarEntryInfo(VarName)) {
4020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4021 assert(Entry.isValid() && Entry.getFlags() == Flags &&
4022 "Entry not initialized!");
4023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4024 "Resetting with the new address.");
4025 if (Entry.getVarSize().isZero()) {
4026 Entry.setVarSize(VarSize);
4027 Entry.setLinkage(Linkage);
4028 }
4029 return;
4030 }
4031 OffloadEntriesDeviceGlobalVar.try_emplace(
4032 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4033 ++OffloadingEntriesNum;
4034 }
4035 }
4036
4037 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)4038 actOnDeviceGlobalVarEntriesInfo(
4039 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4040 // Scan all target region entries and perform the provided action.
4041 for (const auto &E : OffloadEntriesDeviceGlobalVar)
4042 Action(E.getKey(), E.getValue());
4043 }
4044
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)4045 void CGOpenMPRuntime::createOffloadEntry(
4046 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4047 llvm::GlobalValue::LinkageTypes Linkage) {
4048 StringRef Name = Addr->getName();
4049 llvm::Module &M = CGM.getModule();
4050 llvm::LLVMContext &C = M.getContext();
4051
4052 // Create constant string with the name.
4053 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4054
4055 std::string StringName = getName({"omp_offloading", "entry_name"});
4056 auto *Str = new llvm::GlobalVariable(
4057 M, StrPtrInit->getType(), /*isConstant=*/true,
4058 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4059 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4060
4061 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4062 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4063 llvm::ConstantInt::get(CGM.SizeTy, Size),
4064 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4065 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4066 std::string EntryName = getName({"omp_offloading", "entry", ""});
4067 llvm::GlobalVariable *Entry = createGlobalStruct(
4068 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4069 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4070
4071 // The entry has to be created in the section the linker expects it to be.
4072 Entry->setSection("omp_offloading_entries");
4073 }
4074
createOffloadEntriesAndInfoMetadata()4075 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4076 // Emit the offloading entries and metadata so that the device codegen side
4077 // can easily figure out what to emit. The produced metadata looks like
4078 // this:
4079 //
4080 // !omp_offload.info = !{!1, ...}
4081 //
4082 // Right now we only generate metadata for function that contain target
4083 // regions.
4084
4085 // If we are in simd mode or there are no entries, we don't need to do
4086 // anything.
4087 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4088 return;
4089
4090 llvm::Module &M = CGM.getModule();
4091 llvm::LLVMContext &C = M.getContext();
4092 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4093 SourceLocation, StringRef>,
4094 16>
4095 OrderedEntries(OffloadEntriesInfoManager.size());
4096 llvm::SmallVector<StringRef, 16> ParentFunctions(
4097 OffloadEntriesInfoManager.size());
4098
4099 // Auxiliary methods to create metadata values and strings.
4100 auto &&GetMDInt = [this](unsigned V) {
4101 return llvm::ConstantAsMetadata::get(
4102 llvm::ConstantInt::get(CGM.Int32Ty, V));
4103 };
4104
4105 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4106
4107 // Create the offloading info metadata node.
4108 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4109
4110 // Create function that emits metadata for each target region entry;
4111 auto &&TargetRegionMetadataEmitter =
4112 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4113 &GetMDString](
4114 unsigned DeviceID, unsigned FileID, StringRef ParentName,
4115 unsigned Line,
4116 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4117 // Generate metadata for target regions. Each entry of this metadata
4118 // contains:
4119 // - Entry 0 -> Kind of this type of metadata (0).
4120 // - Entry 1 -> Device ID of the file where the entry was identified.
4121 // - Entry 2 -> File ID of the file where the entry was identified.
4122 // - Entry 3 -> Mangled name of the function where the entry was
4123 // identified.
4124 // - Entry 4 -> Line in the file where the entry was identified.
4125 // - Entry 5 -> Order the entry was created.
4126 // The first element of the metadata node is the kind.
4127 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4128 GetMDInt(FileID), GetMDString(ParentName),
4129 GetMDInt(Line), GetMDInt(E.getOrder())};
4130
4131 SourceLocation Loc;
4132 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4133 E = CGM.getContext().getSourceManager().fileinfo_end();
4134 I != E; ++I) {
4135 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4136 I->getFirst()->getUniqueID().getFile() == FileID) {
4137 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4138 I->getFirst(), Line, 1);
4139 break;
4140 }
4141 }
4142 // Save this entry in the right position of the ordered entries array.
4143 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4144 ParentFunctions[E.getOrder()] = ParentName;
4145
4146 // Add metadata to the named metadata node.
4147 MD->addOperand(llvm::MDNode::get(C, Ops));
4148 };
4149
4150 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4151 TargetRegionMetadataEmitter);
4152
4153 // Create function that emits metadata for each device global variable entry;
4154 auto &&DeviceGlobalVarMetadataEmitter =
4155 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4156 MD](StringRef MangledName,
4157 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4158 &E) {
4159 // Generate metadata for global variables. Each entry of this metadata
4160 // contains:
4161 // - Entry 0 -> Kind of this type of metadata (1).
4162 // - Entry 1 -> Mangled name of the variable.
4163 // - Entry 2 -> Declare target kind.
4164 // - Entry 3 -> Order the entry was created.
4165 // The first element of the metadata node is the kind.
4166 llvm::Metadata *Ops[] = {
4167 GetMDInt(E.getKind()), GetMDString(MangledName),
4168 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4169
4170 // Save this entry in the right position of the ordered entries array.
4171 OrderedEntries[E.getOrder()] =
4172 std::make_tuple(&E, SourceLocation(), MangledName);
4173
4174 // Add metadata to the named metadata node.
4175 MD->addOperand(llvm::MDNode::get(C, Ops));
4176 };
4177
4178 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4179 DeviceGlobalVarMetadataEmitter);
4180
4181 for (const auto &E : OrderedEntries) {
4182 assert(std::get<0>(E) && "All ordered entries must exist!");
4183 if (const auto *CE =
4184 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4185 std::get<0>(E))) {
4186 if (!CE->getID() || !CE->getAddress()) {
4187 // Do not blame the entry if the parent funtion is not emitted.
4188 StringRef FnName = ParentFunctions[CE->getOrder()];
4189 if (!CGM.GetGlobalValue(FnName))
4190 continue;
4191 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4192 DiagnosticsEngine::Error,
4193 "Offloading entry for target region in %0 is incorrect: either the "
4194 "address or the ID is invalid.");
4195 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4196 continue;
4197 }
4198 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4199 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4200 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4201 OffloadEntryInfoDeviceGlobalVar>(
4202 std::get<0>(E))) {
4203 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4204 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4205 CE->getFlags());
4206 switch (Flags) {
4207 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4208 if (CGM.getLangOpts().OpenMPIsDevice &&
4209 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4210 continue;
4211 if (!CE->getAddress()) {
4212 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4213 DiagnosticsEngine::Error, "Offloading entry for declare target "
4214 "variable %0 is incorrect: the "
4215 "address is invalid.");
4216 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4217 continue;
4218 }
4219 // The vaiable has no definition - no need to add the entry.
4220 if (CE->getVarSize().isZero())
4221 continue;
4222 break;
4223 }
4224 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4225 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4226 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4227 "Declaret target link address is set.");
4228 if (CGM.getLangOpts().OpenMPIsDevice)
4229 continue;
4230 if (!CE->getAddress()) {
4231 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4232 DiagnosticsEngine::Error,
4233 "Offloading entry for declare target variable is incorrect: the "
4234 "address is invalid.");
4235 CGM.getDiags().Report(DiagID);
4236 continue;
4237 }
4238 break;
4239 }
4240 createOffloadEntry(CE->getAddress(), CE->getAddress(),
4241 CE->getVarSize().getQuantity(), Flags,
4242 CE->getLinkage());
4243 } else {
4244 llvm_unreachable("Unsupported entry kind.");
4245 }
4246 }
4247 }
4248
4249 /// Loads all the offload entries information from the host IR
4250 /// metadata.
loadOffloadInfoMetadata()4251 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4252 // If we are in target mode, load the metadata from the host IR. This code has
4253 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4254
4255 if (!CGM.getLangOpts().OpenMPIsDevice)
4256 return;
4257
4258 if (CGM.getLangOpts().OMPHostIRFile.empty())
4259 return;
4260
4261 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4262 if (auto EC = Buf.getError()) {
4263 CGM.getDiags().Report(diag::err_cannot_open_file)
4264 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4265 return;
4266 }
4267
4268 llvm::LLVMContext C;
4269 auto ME = expectedToErrorOrAndEmitErrors(
4270 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4271
4272 if (auto EC = ME.getError()) {
4273 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4274 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4275 CGM.getDiags().Report(DiagID)
4276 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4277 return;
4278 }
4279
4280 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4281 if (!MD)
4282 return;
4283
4284 for (llvm::MDNode *MN : MD->operands()) {
4285 auto &&GetMDInt = [MN](unsigned Idx) {
4286 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4287 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4288 };
4289
4290 auto &&GetMDString = [MN](unsigned Idx) {
4291 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4292 return V->getString();
4293 };
4294
4295 switch (GetMDInt(0)) {
4296 default:
4297 llvm_unreachable("Unexpected metadata!");
4298 break;
4299 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300 OffloadingEntryInfoTargetRegion:
4301 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4302 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4303 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4304 /*Order=*/GetMDInt(5));
4305 break;
4306 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4307 OffloadingEntryInfoDeviceGlobalVar:
4308 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4309 /*MangledName=*/GetMDString(1),
4310 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4311 /*Flags=*/GetMDInt(2)),
4312 /*Order=*/GetMDInt(3));
4313 break;
4314 }
4315 }
4316 }
4317
emitKmpRoutineEntryT(QualType KmpInt32Ty)4318 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4319 if (!KmpRoutineEntryPtrTy) {
4320 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4321 ASTContext &C = CGM.getContext();
4322 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4323 FunctionProtoType::ExtProtoInfo EPI;
4324 KmpRoutineEntryPtrQTy = C.getPointerType(
4325 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4326 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4327 }
4328 }
4329
getTgtOffloadEntryQTy()4330 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4331 // Make sure the type of the entry is already created. This is the type we
4332 // have to create:
4333 // struct __tgt_offload_entry{
4334 // void *addr; // Pointer to the offload entry info.
4335 // // (function or global)
4336 // char *name; // Name of the function or global.
4337 // size_t size; // Size of the entry info (0 if it a function).
4338 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4339 // int32_t reserved; // Reserved, to use by the runtime library.
4340 // };
4341 if (TgtOffloadEntryQTy.isNull()) {
4342 ASTContext &C = CGM.getContext();
4343 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4344 RD->startDefinition();
4345 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4346 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4347 addFieldToRecordDecl(C, RD, C.getSizeType());
4348 addFieldToRecordDecl(
4349 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4350 addFieldToRecordDecl(
4351 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4352 RD->completeDefinition();
4353 RD->addAttr(PackedAttr::CreateImplicit(C));
4354 TgtOffloadEntryQTy = C.getRecordType(RD);
4355 }
4356 return TgtOffloadEntryQTy;
4357 }
4358
4359 namespace {
4360 struct PrivateHelpersTy {
PrivateHelpersTy__anona2876c6b1611::PrivateHelpersTy4361 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4362 const VarDecl *PrivateElemInit)
4363 : Original(Original), PrivateCopy(PrivateCopy),
4364 PrivateElemInit(PrivateElemInit) {}
4365 const VarDecl *Original;
4366 const VarDecl *PrivateCopy;
4367 const VarDecl *PrivateElemInit;
4368 };
4369 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4370 } // anonymous namespace
4371
4372 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)4373 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4374 if (!Privates.empty()) {
4375 ASTContext &C = CGM.getContext();
4376 // Build struct .kmp_privates_t. {
4377 // /* private vars */
4378 // };
4379 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4380 RD->startDefinition();
4381 for (const auto &Pair : Privates) {
4382 const VarDecl *VD = Pair.second.Original;
4383 QualType Type = VD->getType().getNonReferenceType();
4384 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4385 if (VD->hasAttrs()) {
4386 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4387 E(VD->getAttrs().end());
4388 I != E; ++I)
4389 FD->addAttr(*I);
4390 }
4391 }
4392 RD->completeDefinition();
4393 return RD;
4394 }
4395 return nullptr;
4396 }
4397
4398 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)4399 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4400 QualType KmpInt32Ty,
4401 QualType KmpRoutineEntryPointerQTy) {
4402 ASTContext &C = CGM.getContext();
4403 // Build struct kmp_task_t {
4404 // void * shareds;
4405 // kmp_routine_entry_t routine;
4406 // kmp_int32 part_id;
4407 // kmp_cmplrdata_t data1;
4408 // kmp_cmplrdata_t data2;
4409 // For taskloops additional fields:
4410 // kmp_uint64 lb;
4411 // kmp_uint64 ub;
4412 // kmp_int64 st;
4413 // kmp_int32 liter;
4414 // void * reductions;
4415 // };
4416 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4417 UD->startDefinition();
4418 addFieldToRecordDecl(C, UD, KmpInt32Ty);
4419 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4420 UD->completeDefinition();
4421 QualType KmpCmplrdataTy = C.getRecordType(UD);
4422 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4423 RD->startDefinition();
4424 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4425 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4426 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4427 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4428 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4429 if (isOpenMPTaskLoopDirective(Kind)) {
4430 QualType KmpUInt64Ty =
4431 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4432 QualType KmpInt64Ty =
4433 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4434 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4435 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4436 addFieldToRecordDecl(C, RD, KmpInt64Ty);
4437 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4438 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4439 }
4440 RD->completeDefinition();
4441 return RD;
4442 }
4443
4444 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)4445 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4446 ArrayRef<PrivateDataTy> Privates) {
4447 ASTContext &C = CGM.getContext();
4448 // Build struct kmp_task_t_with_privates {
4449 // kmp_task_t task_data;
4450 // .kmp_privates_t. privates;
4451 // };
4452 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4453 RD->startDefinition();
4454 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4455 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4456 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4457 RD->completeDefinition();
4458 return RD;
4459 }
4460
4461 /// Emit a proxy function which accepts kmp_task_t as the second
4462 /// argument.
4463 /// \code
4464 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4465 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4466 /// For taskloops:
4467 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4468 /// tt->reductions, tt->shareds);
4469 /// return 0;
4470 /// }
4471 /// \endcode
4472 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)4473 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4474 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4475 QualType KmpTaskTWithPrivatesPtrQTy,
4476 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4477 QualType SharedsPtrTy, llvm::Function *TaskFunction,
4478 llvm::Value *TaskPrivatesMap) {
4479 ASTContext &C = CGM.getContext();
4480 FunctionArgList Args;
4481 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4482 ImplicitParamDecl::Other);
4483 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4484 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4485 ImplicitParamDecl::Other);
4486 Args.push_back(&GtidArg);
4487 Args.push_back(&TaskTypeArg);
4488 const auto &TaskEntryFnInfo =
4489 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4490 llvm::FunctionType *TaskEntryTy =
4491 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4492 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4493 auto *TaskEntry = llvm::Function::Create(
4494 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4495 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4496 TaskEntry->setDoesNotRecurse();
4497 CodeGenFunction CGF(CGM);
4498 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4499 Loc, Loc);
4500
4501 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4502 // tt,
4503 // For taskloops:
4504 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4505 // tt->task_data.shareds);
4506 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4507 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4508 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4509 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4510 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4511 const auto *KmpTaskTWithPrivatesQTyRD =
4512 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4513 LValue Base =
4514 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4515 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4516 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4517 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4518 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4519
4520 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4521 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4522 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4524 CGF.ConvertTypeForMem(SharedsPtrTy));
4525
4526 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4527 llvm::Value *PrivatesParam;
4528 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4529 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4530 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4531 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4532 } else {
4533 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4534 }
4535
4536 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4537 TaskPrivatesMap,
4538 CGF.Builder
4539 .CreatePointerBitCastOrAddrSpaceCast(
4540 TDBase.getAddress(CGF), CGF.VoidPtrTy)
4541 .getPointer()};
4542 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4543 std::end(CommonArgs));
4544 if (isOpenMPTaskLoopDirective(Kind)) {
4545 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4546 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4547 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4548 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4549 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4550 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4551 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4552 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4553 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4554 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4555 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4556 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4557 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4558 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4559 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4560 CallArgs.push_back(LBParam);
4561 CallArgs.push_back(UBParam);
4562 CallArgs.push_back(StParam);
4563 CallArgs.push_back(LIParam);
4564 CallArgs.push_back(RParam);
4565 }
4566 CallArgs.push_back(SharedsParam);
4567
4568 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4569 CallArgs);
4570 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4571 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4572 CGF.FinishFunction();
4573 return TaskEntry;
4574 }
4575
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)4576 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4577 SourceLocation Loc,
4578 QualType KmpInt32Ty,
4579 QualType KmpTaskTWithPrivatesPtrQTy,
4580 QualType KmpTaskTWithPrivatesQTy) {
4581 ASTContext &C = CGM.getContext();
4582 FunctionArgList Args;
4583 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4584 ImplicitParamDecl::Other);
4585 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4586 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4587 ImplicitParamDecl::Other);
4588 Args.push_back(&GtidArg);
4589 Args.push_back(&TaskTypeArg);
4590 const auto &DestructorFnInfo =
4591 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4592 llvm::FunctionType *DestructorFnTy =
4593 CGM.getTypes().GetFunctionType(DestructorFnInfo);
4594 std::string Name =
4595 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4596 auto *DestructorFn =
4597 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4598 Name, &CGM.getModule());
4599 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4600 DestructorFnInfo);
4601 DestructorFn->setDoesNotRecurse();
4602 CodeGenFunction CGF(CGM);
4603 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4604 Args, Loc, Loc);
4605
4606 LValue Base = CGF.EmitLoadOfPointerLValue(
4607 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4608 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4609 const auto *KmpTaskTWithPrivatesQTyRD =
4610 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4611 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4612 Base = CGF.EmitLValueForField(Base, *FI);
4613 for (const auto *Field :
4614 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4615 if (QualType::DestructionKind DtorKind =
4616 Field->getType().isDestructedType()) {
4617 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4618 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4619 }
4620 }
4621 CGF.FinishFunction();
4622 return DestructorFn;
4623 }
4624
4625 /// Emit a privates mapping function for correct handling of private and
4626 /// firstprivate variables.
4627 /// \code
4628 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4629 /// **noalias priv1,..., <tyn> **noalias privn) {
4630 /// *priv1 = &.privates.priv1;
4631 /// ...;
4632 /// *privn = &.privates.privn;
4633 /// }
4634 /// \endcode
4635 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > FirstprivateVars,ArrayRef<const Expr * > LastprivateVars,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)4636 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4637 ArrayRef<const Expr *> PrivateVars,
4638 ArrayRef<const Expr *> FirstprivateVars,
4639 ArrayRef<const Expr *> LastprivateVars,
4640 QualType PrivatesQTy,
4641 ArrayRef<PrivateDataTy> Privates) {
4642 ASTContext &C = CGM.getContext();
4643 FunctionArgList Args;
4644 ImplicitParamDecl TaskPrivatesArg(
4645 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4647 ImplicitParamDecl::Other);
4648 Args.push_back(&TaskPrivatesArg);
4649 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4650 unsigned Counter = 1;
4651 for (const Expr *E : PrivateVars) {
4652 Args.push_back(ImplicitParamDecl::Create(
4653 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4654 C.getPointerType(C.getPointerType(E->getType()))
4655 .withConst()
4656 .withRestrict(),
4657 ImplicitParamDecl::Other));
4658 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4659 PrivateVarsPos[VD] = Counter;
4660 ++Counter;
4661 }
4662 for (const Expr *E : FirstprivateVars) {
4663 Args.push_back(ImplicitParamDecl::Create(
4664 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4665 C.getPointerType(C.getPointerType(E->getType()))
4666 .withConst()
4667 .withRestrict(),
4668 ImplicitParamDecl::Other));
4669 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4670 PrivateVarsPos[VD] = Counter;
4671 ++Counter;
4672 }
4673 for (const Expr *E : LastprivateVars) {
4674 Args.push_back(ImplicitParamDecl::Create(
4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676 C.getPointerType(C.getPointerType(E->getType()))
4677 .withConst()
4678 .withRestrict(),
4679 ImplicitParamDecl::Other));
4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681 PrivateVarsPos[VD] = Counter;
4682 ++Counter;
4683 }
4684 const auto &TaskPrivatesMapFnInfo =
4685 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4686 llvm::FunctionType *TaskPrivatesMapTy =
4687 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4688 std::string Name =
4689 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4690 auto *TaskPrivatesMap = llvm::Function::Create(
4691 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4692 &CGM.getModule());
4693 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4694 TaskPrivatesMapFnInfo);
4695 if (CGM.getLangOpts().Optimize) {
4696 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4697 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4698 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4699 }
4700 CodeGenFunction CGF(CGM);
4701 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4702 TaskPrivatesMapFnInfo, Args, Loc, Loc);
4703
4704 // *privi = &.privates.privi;
4705 LValue Base = CGF.EmitLoadOfPointerLValue(
4706 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4707 TaskPrivatesArg.getType()->castAs<PointerType>());
4708 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4709 Counter = 0;
4710 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4711 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4712 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4713 LValue RefLVal =
4714 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4715 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4716 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4717 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4718 ++Counter;
4719 }
4720 CGF.FinishFunction();
4721 return TaskPrivatesMap;
4722 }
4723
4724 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)4725 static void emitPrivatesInit(CodeGenFunction &CGF,
4726 const OMPExecutableDirective &D,
4727 Address KmpTaskSharedsPtr, LValue TDBase,
4728 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4729 QualType SharedsTy, QualType SharedsPtrTy,
4730 const OMPTaskDataTy &Data,
4731 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4732 ASTContext &C = CGF.getContext();
4733 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4734 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4735 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4736 ? OMPD_taskloop
4737 : OMPD_task;
4738 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4739 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4740 LValue SrcBase;
4741 bool IsTargetTask =
4742 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4743 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4744 // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4745 // PointersArray and SizesArray. The original variables for these arrays are
4746 // not captured and we get their addresses explicitly.
4747 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4748 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4749 SrcBase = CGF.MakeAddrLValue(
4750 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4751 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4752 SharedsTy);
4753 }
4754 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4755 for (const PrivateDataTy &Pair : Privates) {
4756 const VarDecl *VD = Pair.second.PrivateCopy;
4757 const Expr *Init = VD->getAnyInitializer();
4758 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4759 !CGF.isTrivialInitializer(Init)))) {
4760 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4761 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4762 const VarDecl *OriginalVD = Pair.second.Original;
4763 // Check if the variable is the target-based BasePointersArray,
4764 // PointersArray or SizesArray.
4765 LValue SharedRefLValue;
4766 QualType Type = PrivateLValue.getType();
4767 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4768 if (IsTargetTask && !SharedField) {
4769 assert(isa<ImplicitParamDecl>(OriginalVD) &&
4770 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4771 cast<CapturedDecl>(OriginalVD->getDeclContext())
4772 ->getNumParams() == 0 &&
4773 isa<TranslationUnitDecl>(
4774 cast<CapturedDecl>(OriginalVD->getDeclContext())
4775 ->getDeclContext()) &&
4776 "Expected artificial target data variable.");
4777 SharedRefLValue =
4778 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4779 } else {
4780 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4781 SharedRefLValue = CGF.MakeAddrLValue(
4782 Address(SharedRefLValue.getPointer(CGF),
4783 C.getDeclAlign(OriginalVD)),
4784 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4785 SharedRefLValue.getTBAAInfo());
4786 }
4787 if (Type->isArrayType()) {
4788 // Initialize firstprivate array.
4789 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4790 // Perform simple memcpy.
4791 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4792 } else {
4793 // Initialize firstprivate array using element-by-element
4794 // initialization.
4795 CGF.EmitOMPAggregateAssign(
4796 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4797 Type,
4798 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4799 Address SrcElement) {
4800 // Clean up any temporaries needed by the initialization.
4801 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4802 InitScope.addPrivate(
4803 Elem, [SrcElement]() -> Address { return SrcElement; });
4804 (void)InitScope.Privatize();
4805 // Emit initialization for single element.
4806 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4807 CGF, &CapturesInfo);
4808 CGF.EmitAnyExprToMem(Init, DestElement,
4809 Init->getType().getQualifiers(),
4810 /*IsInitializer=*/false);
4811 });
4812 }
4813 } else {
4814 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4815 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4816 return SharedRefLValue.getAddress(CGF);
4817 });
4818 (void)InitScope.Privatize();
4819 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4820 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4821 /*capturedByInit=*/false);
4822 }
4823 } else {
4824 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4825 }
4826 }
4827 ++FI;
4828 }
4829 }
4830
4831 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)4832 static bool checkInitIsRequired(CodeGenFunction &CGF,
4833 ArrayRef<PrivateDataTy> Privates) {
4834 bool InitRequired = false;
4835 for (const PrivateDataTy &Pair : Privates) {
4836 const VarDecl *VD = Pair.second.PrivateCopy;
4837 const Expr *Init = VD->getAnyInitializer();
4838 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4839 !CGF.isTrivialInitializer(Init));
4840 if (InitRequired)
4841 break;
4842 }
4843 return InitRequired;
4844 }
4845
4846
4847 /// Emit task_dup function (for initialization of
4848 /// private/firstprivate/lastprivate vars and last_iter flag)
4849 /// \code
4850 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4851 /// lastpriv) {
4852 /// // setup lastprivate flag
4853 /// task_dst->last = lastpriv;
4854 /// // could be constructor calls here...
4855 /// }
4856 /// \endcode
4857 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)4858 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4859 const OMPExecutableDirective &D,
4860 QualType KmpTaskTWithPrivatesPtrQTy,
4861 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4862 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4863 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4864 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4865 ASTContext &C = CGM.getContext();
4866 FunctionArgList Args;
4867 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4868 KmpTaskTWithPrivatesPtrQTy,
4869 ImplicitParamDecl::Other);
4870 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4871 KmpTaskTWithPrivatesPtrQTy,
4872 ImplicitParamDecl::Other);
4873 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4874 ImplicitParamDecl::Other);
4875 Args.push_back(&DstArg);
4876 Args.push_back(&SrcArg);
4877 Args.push_back(&LastprivArg);
4878 const auto &TaskDupFnInfo =
4879 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4880 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4881 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4882 auto *TaskDup = llvm::Function::Create(
4883 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4884 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4885 TaskDup->setDoesNotRecurse();
4886 CodeGenFunction CGF(CGM);
4887 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4888 Loc);
4889
4890 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4891 CGF.GetAddrOfLocalVar(&DstArg),
4892 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4893 // task_dst->liter = lastpriv;
4894 if (WithLastIter) {
4895 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4896 LValue Base = CGF.EmitLValueForField(
4897 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4898 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4899 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4900 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4901 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4902 }
4903
4904 // Emit initial values for private copies (if any).
4905 assert(!Privates.empty());
4906 Address KmpTaskSharedsPtr = Address::invalid();
4907 if (!Data.FirstprivateVars.empty()) {
4908 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4909 CGF.GetAddrOfLocalVar(&SrcArg),
4910 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4911 LValue Base = CGF.EmitLValueForField(
4912 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4913 KmpTaskSharedsPtr = Address(
4914 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4915 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4916 KmpTaskTShareds)),
4917 Loc),
4918 CGF.getNaturalTypeAlignment(SharedsTy));
4919 }
4920 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4921 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4922 CGF.FinishFunction();
4923 return TaskDup;
4924 }
4925
4926 /// Checks if destructor function is required to be generated.
4927 /// \return true if cleanups are required, false otherwise.
4928 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD)4929 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4930 bool NeedsCleanup = false;
4931 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4932 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4933 for (const FieldDecl *FD : PrivateRD->fields()) {
4934 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4935 if (NeedsCleanup)
4936 break;
4937 }
4938 return NeedsCleanup;
4939 }
4940
4941 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4942 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4943 const OMPExecutableDirective &D,
4944 llvm::Function *TaskFunction, QualType SharedsTy,
4945 Address Shareds, const OMPTaskDataTy &Data) {
4946 ASTContext &C = CGM.getContext();
4947 llvm::SmallVector<PrivateDataTy, 4> Privates;
4948 // Aggregate privates and sort them by the alignment.
4949 auto I = Data.PrivateCopies.begin();
4950 for (const Expr *E : Data.PrivateVars) {
4951 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4952 Privates.emplace_back(
4953 C.getDeclAlign(VD),
4954 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4955 /*PrivateElemInit=*/nullptr));
4956 ++I;
4957 }
4958 I = Data.FirstprivateCopies.begin();
4959 auto IElemInitRef = Data.FirstprivateInits.begin();
4960 for (const Expr *E : Data.FirstprivateVars) {
4961 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4962 Privates.emplace_back(
4963 C.getDeclAlign(VD),
4964 PrivateHelpersTy(
4965 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4966 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4967 ++I;
4968 ++IElemInitRef;
4969 }
4970 I = Data.LastprivateCopies.begin();
4971 for (const Expr *E : Data.LastprivateVars) {
4972 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4973 Privates.emplace_back(
4974 C.getDeclAlign(VD),
4975 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4976 /*PrivateElemInit=*/nullptr));
4977 ++I;
4978 }
4979 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4980 return L.first > R.first;
4981 });
4982 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4983 // Build type kmp_routine_entry_t (if not built yet).
4984 emitKmpRoutineEntryT(KmpInt32Ty);
4985 // Build type kmp_task_t (if not built yet).
4986 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4987 if (SavedKmpTaskloopTQTy.isNull()) {
4988 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4989 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4990 }
4991 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4992 } else {
4993 assert((D.getDirectiveKind() == OMPD_task ||
4994 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4995 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4996 "Expected taskloop, task or target directive");
4997 if (SavedKmpTaskTQTy.isNull()) {
4998 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4999 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5000 }
5001 KmpTaskTQTy = SavedKmpTaskTQTy;
5002 }
5003 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5004 // Build particular struct kmp_task_t for the given task.
5005 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5006 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5007 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5008 QualType KmpTaskTWithPrivatesPtrQTy =
5009 C.getPointerType(KmpTaskTWithPrivatesQTy);
5010 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5011 llvm::Type *KmpTaskTWithPrivatesPtrTy =
5012 KmpTaskTWithPrivatesTy->getPointerTo();
5013 llvm::Value *KmpTaskTWithPrivatesTySize =
5014 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5015 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5016
5017 // Emit initial values for private copies (if any).
5018 llvm::Value *TaskPrivatesMap = nullptr;
5019 llvm::Type *TaskPrivatesMapTy =
5020 std::next(TaskFunction->arg_begin(), 3)->getType();
5021 if (!Privates.empty()) {
5022 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5023 TaskPrivatesMap = emitTaskPrivateMappingFunction(
5024 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5025 FI->getType(), Privates);
5026 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5027 TaskPrivatesMap, TaskPrivatesMapTy);
5028 } else {
5029 TaskPrivatesMap = llvm::ConstantPointerNull::get(
5030 cast<llvm::PointerType>(TaskPrivatesMapTy));
5031 }
5032 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5033 // kmp_task_t *tt);
5034 llvm::Function *TaskEntry = emitProxyTaskFunction(
5035 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5036 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5037 TaskPrivatesMap);
5038
5039 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5040 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5041 // kmp_routine_entry_t *task_entry);
5042 // Task flags. Format is taken from
5043 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5044 // description of kmp_tasking_flags struct.
5045 enum {
5046 TiedFlag = 0x1,
5047 FinalFlag = 0x2,
5048 DestructorsFlag = 0x8,
5049 PriorityFlag = 0x20
5050 };
5051 unsigned Flags = Data.Tied ? TiedFlag : 0;
5052 bool NeedsCleanup = false;
5053 if (!Privates.empty()) {
5054 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5055 if (NeedsCleanup)
5056 Flags = Flags | DestructorsFlag;
5057 }
5058 if (Data.Priority.getInt())
5059 Flags = Flags | PriorityFlag;
5060 llvm::Value *TaskFlags =
5061 Data.Final.getPointer()
5062 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5063 CGF.Builder.getInt32(FinalFlag),
5064 CGF.Builder.getInt32(/*C=*/0))
5065 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5066 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5067 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5068 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5069 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5070 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5071 TaskEntry, KmpRoutineEntryPtrTy)};
5072 llvm::Value *NewTask;
5073 if (D.hasClausesOfKind<OMPNowaitClause>()) {
5074 // Check if we have any device clause associated with the directive.
5075 const Expr *Device = nullptr;
5076 if (auto *C = D.getSingleClause<OMPDeviceClause>())
5077 Device = C->getDevice();
5078 // Emit device ID if any otherwise use default value.
5079 llvm::Value *DeviceID;
5080 if (Device)
5081 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5082 CGF.Int64Ty, /*isSigned=*/true);
5083 else
5084 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5085 AllocArgs.push_back(DeviceID);
5086 NewTask = CGF.EmitRuntimeCall(
5087 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5088 } else {
5089 NewTask = CGF.EmitRuntimeCall(
5090 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5091 }
5092 llvm::Value *NewTaskNewTaskTTy =
5093 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5094 NewTask, KmpTaskTWithPrivatesPtrTy);
5095 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5096 KmpTaskTWithPrivatesQTy);
5097 LValue TDBase =
5098 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5099 // Fill the data in the resulting kmp_task_t record.
5100 // Copy shareds if there are any.
5101 Address KmpTaskSharedsPtr = Address::invalid();
5102 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5103 KmpTaskSharedsPtr =
5104 Address(CGF.EmitLoadOfScalar(
5105 CGF.EmitLValueForField(
5106 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5107 KmpTaskTShareds)),
5108 Loc),
5109 CGF.getNaturalTypeAlignment(SharedsTy));
5110 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5111 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5112 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5113 }
5114 // Emit initial values for private copies (if any).
5115 TaskResultTy Result;
5116 if (!Privates.empty()) {
5117 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5118 SharedsTy, SharedsPtrTy, Data, Privates,
5119 /*ForDup=*/false);
5120 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5121 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5122 Result.TaskDupFn = emitTaskDupFunction(
5123 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5124 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5125 /*WithLastIter=*/!Data.LastprivateVars.empty());
5126 }
5127 }
5128 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5129 enum { Priority = 0, Destructors = 1 };
5130 // Provide pointer to function with destructors for privates.
5131 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5132 const RecordDecl *KmpCmplrdataUD =
5133 (*FI)->getType()->getAsUnionType()->getDecl();
5134 if (NeedsCleanup) {
5135 llvm::Value *DestructorFn = emitDestructorsFunction(
5136 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5137 KmpTaskTWithPrivatesQTy);
5138 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5139 LValue DestructorsLV = CGF.EmitLValueForField(
5140 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5141 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5142 DestructorFn, KmpRoutineEntryPtrTy),
5143 DestructorsLV);
5144 }
5145 // Set priority.
5146 if (Data.Priority.getInt()) {
5147 LValue Data2LV = CGF.EmitLValueForField(
5148 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5149 LValue PriorityLV = CGF.EmitLValueForField(
5150 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5151 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5152 }
5153 Result.NewTask = NewTask;
5154 Result.TaskEntry = TaskEntry;
5155 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5156 Result.TDBase = TDBase;
5157 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5158 return Result;
5159 }
5160
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5161 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5162 const OMPExecutableDirective &D,
5163 llvm::Function *TaskFunction,
5164 QualType SharedsTy, Address Shareds,
5165 const Expr *IfCond,
5166 const OMPTaskDataTy &Data) {
5167 if (!CGF.HaveInsertPoint())
5168 return;
5169
5170 TaskResultTy Result =
5171 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5172 llvm::Value *NewTask = Result.NewTask;
5173 llvm::Function *TaskEntry = Result.TaskEntry;
5174 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5175 LValue TDBase = Result.TDBase;
5176 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5177 ASTContext &C = CGM.getContext();
5178 // Process list of dependences.
5179 Address DependenciesArray = Address::invalid();
5180 unsigned NumDependencies = Data.Dependences.size();
5181 if (NumDependencies) {
5182 // Dependence kind for RTL.
5183 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5184 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5185 RecordDecl *KmpDependInfoRD;
5186 QualType FlagsTy =
5187 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5188 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5189 if (KmpDependInfoTy.isNull()) {
5190 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5191 KmpDependInfoRD->startDefinition();
5192 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5193 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5194 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5195 KmpDependInfoRD->completeDefinition();
5196 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5197 } else {
5198 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5199 }
5200 // Define type kmp_depend_info[<Dependences.size()>];
5201 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5202 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5203 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5204 // kmp_depend_info[<Dependences.size()>] deps;
5205 DependenciesArray =
5206 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5207 for (unsigned I = 0; I < NumDependencies; ++I) {
5208 const Expr *E = Data.Dependences[I].second;
5209 LValue Addr = CGF.EmitLValue(E);
5210 llvm::Value *Size;
5211 QualType Ty = E->getType();
5212 if (const auto *ASE =
5213 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5214 LValue UpAddrLVal =
5215 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5216 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5217 UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5218 llvm::Value *LowIntPtr =
5219 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5220 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5221 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5222 } else {
5223 Size = CGF.getTypeSize(Ty);
5224 }
5225 LValue Base = CGF.MakeAddrLValue(
5226 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5227 KmpDependInfoTy);
5228 // deps[i].base_addr = &<Dependences[i].second>;
5229 LValue BaseAddrLVal = CGF.EmitLValueForField(
5230 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5231 CGF.EmitStoreOfScalar(
5232 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5233 BaseAddrLVal);
5234 // deps[i].len = sizeof(<Dependences[i].second>);
5235 LValue LenLVal = CGF.EmitLValueForField(
5236 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5237 CGF.EmitStoreOfScalar(Size, LenLVal);
5238 // deps[i].flags = <Dependences[i].first>;
5239 RTLDependenceKindTy DepKind;
5240 switch (Data.Dependences[I].first) {
5241 case OMPC_DEPEND_in:
5242 DepKind = DepIn;
5243 break;
5244 // Out and InOut dependencies must use the same code.
5245 case OMPC_DEPEND_out:
5246 case OMPC_DEPEND_inout:
5247 DepKind = DepInOut;
5248 break;
5249 case OMPC_DEPEND_mutexinoutset:
5250 DepKind = DepMutexInOutSet;
5251 break;
5252 case OMPC_DEPEND_source:
5253 case OMPC_DEPEND_sink:
5254 case OMPC_DEPEND_unknown:
5255 llvm_unreachable("Unknown task dependence type");
5256 }
5257 LValue FlagsLVal = CGF.EmitLValueForField(
5258 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5259 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5260 FlagsLVal);
5261 }
5262 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5263 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5264 }
5265
5266 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5267 // libcall.
5268 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5269 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5270 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5271 // list is not empty
5272 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5273 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5274 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5275 llvm::Value *DepTaskArgs[7];
5276 if (NumDependencies) {
5277 DepTaskArgs[0] = UpLoc;
5278 DepTaskArgs[1] = ThreadID;
5279 DepTaskArgs[2] = NewTask;
5280 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5281 DepTaskArgs[4] = DependenciesArray.getPointer();
5282 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5283 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5284 }
5285 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5286 &TaskArgs,
5287 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5288 if (!Data.Tied) {
5289 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5290 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5291 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5292 }
5293 if (NumDependencies) {
5294 CGF.EmitRuntimeCall(
5295 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5296 } else {
5297 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5298 TaskArgs);
5299 }
5300 // Check if parent region is untied and build return for untied task;
5301 if (auto *Region =
5302 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5303 Region->emitUntiedSwitch(CGF);
5304 };
5305
5306 llvm::Value *DepWaitTaskArgs[6];
5307 if (NumDependencies) {
5308 DepWaitTaskArgs[0] = UpLoc;
5309 DepWaitTaskArgs[1] = ThreadID;
5310 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5311 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5312 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5313 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5314 }
5315 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5316 NumDependencies, &DepWaitTaskArgs,
5317 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5318 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5319 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5320 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5321 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5322 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5323 // is specified.
5324 if (NumDependencies)
5325 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5326 DepWaitTaskArgs);
5327 // Call proxy_task_entry(gtid, new_task);
5328 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5329 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5330 Action.Enter(CGF);
5331 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5332 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5333 OutlinedFnArgs);
5334 };
5335
5336 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5337 // kmp_task_t *new_task);
5338 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5339 // kmp_task_t *new_task);
5340 RegionCodeGenTy RCG(CodeGen);
5341 CommonActionTy Action(
5342 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5343 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5344 RCG.setAction(Action);
5345 RCG(CGF);
5346 };
5347
5348 if (IfCond) {
5349 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5350 } else {
5351 RegionCodeGenTy ThenRCG(ThenCodeGen);
5352 ThenRCG(CGF);
5353 }
5354 }
5355
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5356 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5357 const OMPLoopDirective &D,
5358 llvm::Function *TaskFunction,
5359 QualType SharedsTy, Address Shareds,
5360 const Expr *IfCond,
5361 const OMPTaskDataTy &Data) {
5362 if (!CGF.HaveInsertPoint())
5363 return;
5364 TaskResultTy Result =
5365 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5366 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5367 // libcall.
5368 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5369 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5370 // sched, kmp_uint64 grainsize, void *task_dup);
5371 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5372 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5373 llvm::Value *IfVal;
5374 if (IfCond) {
5375 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5376 /*isSigned=*/true);
5377 } else {
5378 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5379 }
5380
5381 LValue LBLVal = CGF.EmitLValueForField(
5382 Result.TDBase,
5383 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5384 const auto *LBVar =
5385 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5386 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5387 LBLVal.getQuals(),
5388 /*IsInitializer=*/true);
5389 LValue UBLVal = CGF.EmitLValueForField(
5390 Result.TDBase,
5391 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5392 const auto *UBVar =
5393 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5394 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5395 UBLVal.getQuals(),
5396 /*IsInitializer=*/true);
5397 LValue StLVal = CGF.EmitLValueForField(
5398 Result.TDBase,
5399 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5400 const auto *StVar =
5401 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5402 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5403 StLVal.getQuals(),
5404 /*IsInitializer=*/true);
5405 // Store reductions address.
5406 LValue RedLVal = CGF.EmitLValueForField(
5407 Result.TDBase,
5408 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5409 if (Data.Reductions) {
5410 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5411 } else {
5412 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5413 CGF.getContext().VoidPtrTy);
5414 }
5415 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5416 llvm::Value *TaskArgs[] = {
5417 UpLoc,
5418 ThreadID,
5419 Result.NewTask,
5420 IfVal,
5421 LBLVal.getPointer(CGF),
5422 UBLVal.getPointer(CGF),
5423 CGF.EmitLoadOfScalar(StLVal, Loc),
5424 llvm::ConstantInt::getSigned(
5425 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5426 llvm::ConstantInt::getSigned(
5427 CGF.IntTy, Data.Schedule.getPointer()
5428 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5429 : NoSchedule),
5430 Data.Schedule.getPointer()
5431 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5432 /*isSigned=*/false)
5433 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5434 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5435 Result.TaskDupFn, CGF.VoidPtrTy)
5436 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5437 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5438 }
5439
5440 /// Emit reduction operation for each element of array (required for
5441 /// array sections) LHS op = RHS.
5442 /// \param Type Type of array.
5443 /// \param LHSVar Variable on the left side of the reduction operation
5444 /// (references element of array in original variable).
5445 /// \param RHSVar Variable on the right side of the reduction operation
5446 /// (references element of array in original variable).
5447 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5448 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5449 static void EmitOMPAggregateReduction(
5450 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5451 const VarDecl *RHSVar,
5452 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5453 const Expr *, const Expr *)> &RedOpGen,
5454 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5455 const Expr *UpExpr = nullptr) {
5456 // Perform element-by-element initialization.
5457 QualType ElementTy;
5458 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5459 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5460
5461 // Drill down to the base element type on both arrays.
5462 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5463 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5464
5465 llvm::Value *RHSBegin = RHSAddr.getPointer();
5466 llvm::Value *LHSBegin = LHSAddr.getPointer();
5467 // Cast from pointer to array type to pointer to single element.
5468 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5469 // The basic structure here is a while-do loop.
5470 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5471 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5472 llvm::Value *IsEmpty =
5473 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5474 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5475
5476 // Enter the loop body, making that address the current address.
5477 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5478 CGF.EmitBlock(BodyBB);
5479
5480 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5481
5482 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5483 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5484 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5485 Address RHSElementCurrent =
5486 Address(RHSElementPHI,
5487 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5488
5489 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5490 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5491 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5492 Address LHSElementCurrent =
5493 Address(LHSElementPHI,
5494 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5495
5496 // Emit copy.
5497 CodeGenFunction::OMPPrivateScope Scope(CGF);
5498 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5499 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5500 Scope.Privatize();
5501 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5502 Scope.ForceCleanup();
5503
5504 // Shift the address forward by one element.
5505 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5506 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5507 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5508 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5509 // Check whether we've reached the end.
5510 llvm::Value *Done =
5511 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5512 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5513 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5514 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5515
5516 // Done.
5517 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5518 }
5519
5520 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5521 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5522 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5523 static void emitReductionCombiner(CodeGenFunction &CGF,
5524 const Expr *ReductionOp) {
5525 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5526 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5527 if (const auto *DRE =
5528 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5529 if (const auto *DRD =
5530 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5531 std::pair<llvm::Function *, llvm::Function *> Reduction =
5532 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5533 RValue Func = RValue::get(Reduction.first);
5534 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5535 CGF.EmitIgnoredExpr(ReductionOp);
5536 return;
5537 }
5538 CGF.EmitIgnoredExpr(ReductionOp);
5539 }
5540
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5541 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5542 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5543 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5544 ArrayRef<const Expr *> ReductionOps) {
5545 ASTContext &C = CGM.getContext();
5546
5547 // void reduction_func(void *LHSArg, void *RHSArg);
5548 FunctionArgList Args;
5549 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5550 ImplicitParamDecl::Other);
5551 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5552 ImplicitParamDecl::Other);
5553 Args.push_back(&LHSArg);
5554 Args.push_back(&RHSArg);
5555 const auto &CGFI =
5556 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5557 std::string Name = getName({"omp", "reduction", "reduction_func"});
5558 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5559 llvm::GlobalValue::InternalLinkage, Name,
5560 &CGM.getModule());
5561 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5562 Fn->setDoesNotRecurse();
5563 CodeGenFunction CGF(CGM);
5564 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5565
5566 // Dst = (void*[n])(LHSArg);
5567 // Src = (void*[n])(RHSArg);
5568 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5570 ArgsType), CGF.getPointerAlign());
5571 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5572 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5573 ArgsType), CGF.getPointerAlign());
5574
5575 // ...
5576 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5577 // ...
5578 CodeGenFunction::OMPPrivateScope Scope(CGF);
5579 auto IPriv = Privates.begin();
5580 unsigned Idx = 0;
5581 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5582 const auto *RHSVar =
5583 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5584 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5585 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5586 });
5587 const auto *LHSVar =
5588 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5589 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5590 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5591 });
5592 QualType PrivTy = (*IPriv)->getType();
5593 if (PrivTy->isVariablyModifiedType()) {
5594 // Get array size and emit VLA type.
5595 ++Idx;
5596 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5597 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5598 const VariableArrayType *VLA =
5599 CGF.getContext().getAsVariableArrayType(PrivTy);
5600 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5601 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5602 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5603 CGF.EmitVariablyModifiedType(PrivTy);
5604 }
5605 }
5606 Scope.Privatize();
5607 IPriv = Privates.begin();
5608 auto ILHS = LHSExprs.begin();
5609 auto IRHS = RHSExprs.begin();
5610 for (const Expr *E : ReductionOps) {
5611 if ((*IPriv)->getType()->isArrayType()) {
5612 // Emit reduction for array section.
5613 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5614 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5615 EmitOMPAggregateReduction(
5616 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5617 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5618 emitReductionCombiner(CGF, E);
5619 });
5620 } else {
5621 // Emit reduction for array subscript or single variable.
5622 emitReductionCombiner(CGF, E);
5623 }
5624 ++IPriv;
5625 ++ILHS;
5626 ++IRHS;
5627 }
5628 Scope.ForceCleanup();
5629 CGF.FinishFunction();
5630 return Fn;
5631 }
5632
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5633 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5634 const Expr *ReductionOp,
5635 const Expr *PrivateRef,
5636 const DeclRefExpr *LHS,
5637 const DeclRefExpr *RHS) {
5638 if (PrivateRef->getType()->isArrayType()) {
5639 // Emit reduction for array section.
5640 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5641 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5642 EmitOMPAggregateReduction(
5643 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5644 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5645 emitReductionCombiner(CGF, ReductionOp);
5646 });
5647 } else {
5648 // Emit reduction for array subscript or single variable.
5649 emitReductionCombiner(CGF, ReductionOp);
5650 }
5651 }
5652
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5653 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5654 ArrayRef<const Expr *> Privates,
5655 ArrayRef<const Expr *> LHSExprs,
5656 ArrayRef<const Expr *> RHSExprs,
5657 ArrayRef<const Expr *> ReductionOps,
5658 ReductionOptionsTy Options) {
5659 if (!CGF.HaveInsertPoint())
5660 return;
5661
5662 bool WithNowait = Options.WithNowait;
5663 bool SimpleReduction = Options.SimpleReduction;
5664
5665 // Next code should be emitted for reduction:
5666 //
5667 // static kmp_critical_name lock = { 0 };
5668 //
5669 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5670 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5671 // ...
5672 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5673 // *(Type<n>-1*)rhs[<n>-1]);
5674 // }
5675 //
5676 // ...
5677 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5678 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5679 // RedList, reduce_func, &<lock>)) {
5680 // case 1:
5681 // ...
5682 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5683 // ...
5684 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685 // break;
5686 // case 2:
5687 // ...
5688 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5689 // ...
5690 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5691 // break;
5692 // default:;
5693 // }
5694 //
5695 // if SimpleReduction is true, only the next code is generated:
5696 // ...
5697 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5698 // ...
5699
5700 ASTContext &C = CGM.getContext();
5701
5702 if (SimpleReduction) {
5703 CodeGenFunction::RunCleanupsScope Scope(CGF);
5704 auto IPriv = Privates.begin();
5705 auto ILHS = LHSExprs.begin();
5706 auto IRHS = RHSExprs.begin();
5707 for (const Expr *E : ReductionOps) {
5708 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5709 cast<DeclRefExpr>(*IRHS));
5710 ++IPriv;
5711 ++ILHS;
5712 ++IRHS;
5713 }
5714 return;
5715 }
5716
5717 // 1. Build a list of reduction variables.
5718 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5719 auto Size = RHSExprs.size();
5720 for (const Expr *E : Privates) {
5721 if (E->getType()->isVariablyModifiedType())
5722 // Reserve place for array size.
5723 ++Size;
5724 }
5725 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5726 QualType ReductionArrayTy =
5727 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5728 /*IndexTypeQuals=*/0);
5729 Address ReductionList =
5730 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5731 auto IPriv = Privates.begin();
5732 unsigned Idx = 0;
5733 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5734 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5735 CGF.Builder.CreateStore(
5736 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5737 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5738 Elem);
5739 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5740 // Store array size.
5741 ++Idx;
5742 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5743 llvm::Value *Size = CGF.Builder.CreateIntCast(
5744 CGF.getVLASize(
5745 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5746 .NumElts,
5747 CGF.SizeTy, /*isSigned=*/false);
5748 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5749 Elem);
5750 }
5751 }
5752
5753 // 2. Emit reduce_func().
5754 llvm::Function *ReductionFn = emitReductionFunction(
5755 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5756 LHSExprs, RHSExprs, ReductionOps);
5757
5758 // 3. Create static kmp_critical_name lock = { 0 };
5759 std::string Name = getName({"reduction"});
5760 llvm::Value *Lock = getCriticalRegionLock(Name);
5761
5762 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5763 // RedList, reduce_func, &<lock>);
5764 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5765 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5766 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5767 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5768 ReductionList.getPointer(), CGF.VoidPtrTy);
5769 llvm::Value *Args[] = {
5770 IdentTLoc, // ident_t *<loc>
5771 ThreadId, // i32 <gtid>
5772 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5773 ReductionArrayTySize, // size_type sizeof(RedList)
5774 RL, // void *RedList
5775 ReductionFn, // void (*) (void *, void *) <reduce_func>
5776 Lock // kmp_critical_name *&<lock>
5777 };
5778 llvm::Value *Res = CGF.EmitRuntimeCall(
5779 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5780 : OMPRTL__kmpc_reduce),
5781 Args);
5782
5783 // 5. Build switch(res)
5784 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5785 llvm::SwitchInst *SwInst =
5786 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5787
5788 // 6. Build case 1:
5789 // ...
5790 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5791 // ...
5792 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5793 // break;
5794 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5795 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5796 CGF.EmitBlock(Case1BB);
5797
5798 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5799 llvm::Value *EndArgs[] = {
5800 IdentTLoc, // ident_t *<loc>
5801 ThreadId, // i32 <gtid>
5802 Lock // kmp_critical_name *&<lock>
5803 };
5804 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5805 CodeGenFunction &CGF, PrePostActionTy &Action) {
5806 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807 auto IPriv = Privates.begin();
5808 auto ILHS = LHSExprs.begin();
5809 auto IRHS = RHSExprs.begin();
5810 for (const Expr *E : ReductionOps) {
5811 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5812 cast<DeclRefExpr>(*IRHS));
5813 ++IPriv;
5814 ++ILHS;
5815 ++IRHS;
5816 }
5817 };
5818 RegionCodeGenTy RCG(CodeGen);
5819 CommonActionTy Action(
5820 nullptr, llvm::None,
5821 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5822 : OMPRTL__kmpc_end_reduce),
5823 EndArgs);
5824 RCG.setAction(Action);
5825 RCG(CGF);
5826
5827 CGF.EmitBranch(DefaultBB);
5828
5829 // 7. Build case 2:
5830 // ...
5831 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5832 // ...
5833 // break;
5834 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5835 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5836 CGF.EmitBlock(Case2BB);
5837
5838 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5839 CodeGenFunction &CGF, PrePostActionTy &Action) {
5840 auto ILHS = LHSExprs.begin();
5841 auto IRHS = RHSExprs.begin();
5842 auto IPriv = Privates.begin();
5843 for (const Expr *E : ReductionOps) {
5844 const Expr *XExpr = nullptr;
5845 const Expr *EExpr = nullptr;
5846 const Expr *UpExpr = nullptr;
5847 BinaryOperatorKind BO = BO_Comma;
5848 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5849 if (BO->getOpcode() == BO_Assign) {
5850 XExpr = BO->getLHS();
5851 UpExpr = BO->getRHS();
5852 }
5853 }
5854 // Try to emit update expression as a simple atomic.
5855 const Expr *RHSExpr = UpExpr;
5856 if (RHSExpr) {
5857 // Analyze RHS part of the whole expression.
5858 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5859 RHSExpr->IgnoreParenImpCasts())) {
5860 // If this is a conditional operator, analyze its condition for
5861 // min/max reduction operator.
5862 RHSExpr = ACO->getCond();
5863 }
5864 if (const auto *BORHS =
5865 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5866 EExpr = BORHS->getRHS();
5867 BO = BORHS->getOpcode();
5868 }
5869 }
5870 if (XExpr) {
5871 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5872 auto &&AtomicRedGen = [BO, VD,
5873 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5874 const Expr *EExpr, const Expr *UpExpr) {
5875 LValue X = CGF.EmitLValue(XExpr);
5876 RValue E;
5877 if (EExpr)
5878 E = CGF.EmitAnyExpr(EExpr);
5879 CGF.EmitOMPAtomicSimpleUpdateExpr(
5880 X, E, BO, /*IsXLHSInRHSPart=*/true,
5881 llvm::AtomicOrdering::Monotonic, Loc,
5882 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5883 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5884 PrivateScope.addPrivate(
5885 VD, [&CGF, VD, XRValue, Loc]() {
5886 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5887 CGF.emitOMPSimpleStore(
5888 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5889 VD->getType().getNonReferenceType(), Loc);
5890 return LHSTemp;
5891 });
5892 (void)PrivateScope.Privatize();
5893 return CGF.EmitAnyExpr(UpExpr);
5894 });
5895 };
5896 if ((*IPriv)->getType()->isArrayType()) {
5897 // Emit atomic reduction for array section.
5898 const auto *RHSVar =
5899 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5900 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5901 AtomicRedGen, XExpr, EExpr, UpExpr);
5902 } else {
5903 // Emit atomic reduction for array subscript or single variable.
5904 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5905 }
5906 } else {
5907 // Emit as a critical region.
5908 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5909 const Expr *, const Expr *) {
5910 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5911 std::string Name = RT.getName({"atomic_reduction"});
5912 RT.emitCriticalRegion(
5913 CGF, Name,
5914 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5915 Action.Enter(CGF);
5916 emitReductionCombiner(CGF, E);
5917 },
5918 Loc);
5919 };
5920 if ((*IPriv)->getType()->isArrayType()) {
5921 const auto *LHSVar =
5922 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5923 const auto *RHSVar =
5924 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5925 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5926 CritRedGen);
5927 } else {
5928 CritRedGen(CGF, nullptr, nullptr, nullptr);
5929 }
5930 }
5931 ++ILHS;
5932 ++IRHS;
5933 ++IPriv;
5934 }
5935 };
5936 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5937 if (!WithNowait) {
5938 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5939 llvm::Value *EndArgs[] = {
5940 IdentTLoc, // ident_t *<loc>
5941 ThreadId, // i32 <gtid>
5942 Lock // kmp_critical_name *&<lock>
5943 };
5944 CommonActionTy Action(nullptr, llvm::None,
5945 createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5946 EndArgs);
5947 AtomicRCG.setAction(Action);
5948 AtomicRCG(CGF);
5949 } else {
5950 AtomicRCG(CGF);
5951 }
5952
5953 CGF.EmitBranch(DefaultBB);
5954 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5955 }
5956
5957 /// Generates unique name for artificial threadprivate variables.
5958 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5959 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5960 const Expr *Ref) {
5961 SmallString<256> Buffer;
5962 llvm::raw_svector_ostream Out(Buffer);
5963 const clang::DeclRefExpr *DE;
5964 const VarDecl *D = ::getBaseDecl(Ref, DE);
5965 if (!D)
5966 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5967 D = D->getCanonicalDecl();
5968 std::string Name = CGM.getOpenMPRuntime().getName(
5969 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5970 Out << Prefix << Name << "_"
5971 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5972 return Out.str();
5973 }
5974
5975 /// Emits reduction initializer function:
5976 /// \code
5977 /// void @.red_init(void* %arg) {
5978 /// %0 = bitcast void* %arg to <type>*
5979 /// store <type> <init>, <type>* %0
5980 /// ret void
5981 /// }
5982 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5983 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5984 SourceLocation Loc,
5985 ReductionCodeGen &RCG, unsigned N) {
5986 ASTContext &C = CGM.getContext();
5987 FunctionArgList Args;
5988 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5989 ImplicitParamDecl::Other);
5990 Args.emplace_back(&Param);
5991 const auto &FnInfo =
5992 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5993 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5994 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5995 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5996 Name, &CGM.getModule());
5997 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5998 Fn->setDoesNotRecurse();
5999 CodeGenFunction CGF(CGM);
6000 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6001 Address PrivateAddr = CGF.EmitLoadOfPointer(
6002 CGF.GetAddrOfLocalVar(&Param),
6003 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004 llvm::Value *Size = nullptr;
6005 // If the size of the reduction item is non-constant, load it from global
6006 // threadprivate variable.
6007 if (RCG.getSizes(N).second) {
6008 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6009 CGF, CGM.getContext().getSizeType(),
6010 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6011 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6012 CGM.getContext().getSizeType(), Loc);
6013 }
6014 RCG.emitAggregateType(CGF, N, Size);
6015 LValue SharedLVal;
6016 // If initializer uses initializer from declare reduction construct, emit a
6017 // pointer to the address of the original reduction item (reuired by reduction
6018 // initializer)
6019 if (RCG.usesReductionInitializer(N)) {
6020 Address SharedAddr =
6021 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6022 CGF, CGM.getContext().VoidPtrTy,
6023 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6024 SharedAddr = CGF.EmitLoadOfPointer(
6025 SharedAddr,
6026 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6027 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6028 } else {
6029 SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6030 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6031 CGM.getContext().VoidPtrTy);
6032 }
6033 // Emit the initializer:
6034 // %0 = bitcast void* %arg to <type>*
6035 // store <type> <init>, <type>* %0
6036 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6037 [](CodeGenFunction &) { return false; });
6038 CGF.FinishFunction();
6039 return Fn;
6040 }
6041
6042 /// Emits reduction combiner function:
6043 /// \code
6044 /// void @.red_comb(void* %arg0, void* %arg1) {
6045 /// %lhs = bitcast void* %arg0 to <type>*
6046 /// %rhs = bitcast void* %arg1 to <type>*
6047 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6048 /// store <type> %2, <type>* %lhs
6049 /// ret void
6050 /// }
6051 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)6052 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6053 SourceLocation Loc,
6054 ReductionCodeGen &RCG, unsigned N,
6055 const Expr *ReductionOp,
6056 const Expr *LHS, const Expr *RHS,
6057 const Expr *PrivateRef) {
6058 ASTContext &C = CGM.getContext();
6059 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6060 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6061 FunctionArgList Args;
6062 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6063 C.VoidPtrTy, ImplicitParamDecl::Other);
6064 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6065 ImplicitParamDecl::Other);
6066 Args.emplace_back(&ParamInOut);
6067 Args.emplace_back(&ParamIn);
6068 const auto &FnInfo =
6069 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6070 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6071 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6072 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6073 Name, &CGM.getModule());
6074 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6075 Fn->setDoesNotRecurse();
6076 CodeGenFunction CGF(CGM);
6077 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6078 llvm::Value *Size = nullptr;
6079 // If the size of the reduction item is non-constant, load it from global
6080 // threadprivate variable.
6081 if (RCG.getSizes(N).second) {
6082 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6083 CGF, CGM.getContext().getSizeType(),
6084 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6085 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6086 CGM.getContext().getSizeType(), Loc);
6087 }
6088 RCG.emitAggregateType(CGF, N, Size);
6089 // Remap lhs and rhs variables to the addresses of the function arguments.
6090 // %lhs = bitcast void* %arg0 to <type>*
6091 // %rhs = bitcast void* %arg1 to <type>*
6092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6093 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6094 // Pull out the pointer to the variable.
6095 Address PtrAddr = CGF.EmitLoadOfPointer(
6096 CGF.GetAddrOfLocalVar(&ParamInOut),
6097 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6098 return CGF.Builder.CreateElementBitCast(
6099 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6100 });
6101 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6102 // Pull out the pointer to the variable.
6103 Address PtrAddr = CGF.EmitLoadOfPointer(
6104 CGF.GetAddrOfLocalVar(&ParamIn),
6105 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6106 return CGF.Builder.CreateElementBitCast(
6107 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6108 });
6109 PrivateScope.Privatize();
6110 // Emit the combiner body:
6111 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6112 // store <type> %2, <type>* %lhs
6113 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6114 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6115 cast<DeclRefExpr>(RHS));
6116 CGF.FinishFunction();
6117 return Fn;
6118 }
6119
6120 /// Emits reduction finalizer function:
6121 /// \code
6122 /// void @.red_fini(void* %arg) {
6123 /// %0 = bitcast void* %arg to <type>*
6124 /// <destroy>(<type>* %0)
6125 /// ret void
6126 /// }
6127 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6128 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6129 SourceLocation Loc,
6130 ReductionCodeGen &RCG, unsigned N) {
6131 if (!RCG.needCleanups(N))
6132 return nullptr;
6133 ASTContext &C = CGM.getContext();
6134 FunctionArgList Args;
6135 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6136 ImplicitParamDecl::Other);
6137 Args.emplace_back(&Param);
6138 const auto &FnInfo =
6139 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6140 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6141 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6142 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6143 Name, &CGM.getModule());
6144 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6145 Fn->setDoesNotRecurse();
6146 CodeGenFunction CGF(CGM);
6147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6148 Address PrivateAddr = CGF.EmitLoadOfPointer(
6149 CGF.GetAddrOfLocalVar(&Param),
6150 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6151 llvm::Value *Size = nullptr;
6152 // If the size of the reduction item is non-constant, load it from global
6153 // threadprivate variable.
6154 if (RCG.getSizes(N).second) {
6155 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6156 CGF, CGM.getContext().getSizeType(),
6157 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6159 CGM.getContext().getSizeType(), Loc);
6160 }
6161 RCG.emitAggregateType(CGF, N, Size);
6162 // Emit the finalizer body:
6163 // <destroy>(<type>* %0)
6164 RCG.emitCleanups(CGF, N, PrivateAddr);
6165 CGF.FinishFunction(Loc);
6166 return Fn;
6167 }
6168
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)6169 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6170 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6171 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6172 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6173 return nullptr;
6174
6175 // Build typedef struct:
6176 // kmp_task_red_input {
6177 // void *reduce_shar; // shared reduction item
6178 // size_t reduce_size; // size of data item
6179 // void *reduce_init; // data initialization routine
6180 // void *reduce_fini; // data finalization routine
6181 // void *reduce_comb; // data combiner routine
6182 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6183 // } kmp_task_red_input_t;
6184 ASTContext &C = CGM.getContext();
6185 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6186 RD->startDefinition();
6187 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6188 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6189 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6190 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6191 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6192 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6193 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6194 RD->completeDefinition();
6195 QualType RDType = C.getRecordType(RD);
6196 unsigned Size = Data.ReductionVars.size();
6197 llvm::APInt ArraySize(/*numBits=*/64, Size);
6198 QualType ArrayRDType = C.getConstantArrayType(
6199 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6200 // kmp_task_red_input_t .rd_input.[Size];
6201 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6202 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6203 Data.ReductionOps);
6204 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6205 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6206 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6207 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6208 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6209 TaskRedInput.getPointer(), Idxs,
6210 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6211 ".rd_input.gep.");
6212 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6213 // ElemLVal.reduce_shar = &Shareds[Cnt];
6214 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6215 RCG.emitSharedLValue(CGF, Cnt);
6216 llvm::Value *CastedShared =
6217 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6218 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6219 RCG.emitAggregateType(CGF, Cnt);
6220 llvm::Value *SizeValInChars;
6221 llvm::Value *SizeVal;
6222 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6223 // We use delayed creation/initialization for VLAs, array sections and
6224 // custom reduction initializations. It is required because runtime does not
6225 // provide the way to pass the sizes of VLAs/array sections to
6226 // initializer/combiner/finalizer functions and does not pass the pointer to
6227 // original reduction item to the initializer. Instead threadprivate global
6228 // variables are used to store these values and use them in the functions.
6229 bool DelayedCreation = !!SizeVal;
6230 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6231 /*isSigned=*/false);
6232 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6233 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6234 // ElemLVal.reduce_init = init;
6235 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6236 llvm::Value *InitAddr =
6237 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6238 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6239 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6240 // ElemLVal.reduce_fini = fini;
6241 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6242 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6243 llvm::Value *FiniAddr = Fini
6244 ? CGF.EmitCastToVoidPtr(Fini)
6245 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6246 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6247 // ElemLVal.reduce_comb = comb;
6248 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6249 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6250 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6251 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6252 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6253 // ElemLVal.flags = 0;
6254 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6255 if (DelayedCreation) {
6256 CGF.EmitStoreOfScalar(
6257 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6258 FlagsLVal);
6259 } else
6260 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6261 FlagsLVal.getType());
6262 }
6263 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6264 // *data);
6265 llvm::Value *Args[] = {
6266 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6267 /*isSigned=*/true),
6268 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6270 CGM.VoidPtrTy)};
6271 return CGF.EmitRuntimeCall(
6272 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6273 }
6274
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6275 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6276 SourceLocation Loc,
6277 ReductionCodeGen &RCG,
6278 unsigned N) {
6279 auto Sizes = RCG.getSizes(N);
6280 // Emit threadprivate global variable if the type is non-constant
6281 // (Sizes.second = nullptr).
6282 if (Sizes.second) {
6283 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6284 /*isSigned=*/false);
6285 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6286 CGF, CGM.getContext().getSizeType(),
6287 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6288 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6289 }
6290 // Store address of the original reduction item if custom initializer is used.
6291 if (RCG.usesReductionInitializer(N)) {
6292 Address SharedAddr = getAddrOfArtificialThreadPrivate(
6293 CGF, CGM.getContext().VoidPtrTy,
6294 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6295 CGF.Builder.CreateStore(
6296 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6297 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6298 SharedAddr, /*IsVolatile=*/false);
6299 }
6300 }
6301
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6302 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6303 SourceLocation Loc,
6304 llvm::Value *ReductionsPtr,
6305 LValue SharedLVal) {
6306 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6307 // *d);
6308 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6309 CGM.IntTy,
6310 /*isSigned=*/true),
6311 ReductionsPtr,
6312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6313 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6314 return Address(
6315 CGF.EmitRuntimeCall(
6316 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6317 SharedLVal.getAlignment());
6318 }
6319
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)6320 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6321 SourceLocation Loc) {
6322 if (!CGF.HaveInsertPoint())
6323 return;
6324 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6325 // global_tid);
6326 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6327 // Ignore return result until untied tasks are supported.
6328 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6329 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6330 Region->emitUntiedSwitch(CGF);
6331 }
6332
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6333 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6334 OpenMPDirectiveKind InnerKind,
6335 const RegionCodeGenTy &CodeGen,
6336 bool HasCancel) {
6337 if (!CGF.HaveInsertPoint())
6338 return;
6339 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6340 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6341 }
6342
6343 namespace {
6344 enum RTCancelKind {
6345 CancelNoreq = 0,
6346 CancelParallel = 1,
6347 CancelLoop = 2,
6348 CancelSections = 3,
6349 CancelTaskgroup = 4
6350 };
6351 } // anonymous namespace
6352
getCancellationKind(OpenMPDirectiveKind CancelRegion)6353 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6354 RTCancelKind CancelKind = CancelNoreq;
6355 if (CancelRegion == OMPD_parallel)
6356 CancelKind = CancelParallel;
6357 else if (CancelRegion == OMPD_for)
6358 CancelKind = CancelLoop;
6359 else if (CancelRegion == OMPD_sections)
6360 CancelKind = CancelSections;
6361 else {
6362 assert(CancelRegion == OMPD_taskgroup);
6363 CancelKind = CancelTaskgroup;
6364 }
6365 return CancelKind;
6366 }
6367
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6368 void CGOpenMPRuntime::emitCancellationPointCall(
6369 CodeGenFunction &CGF, SourceLocation Loc,
6370 OpenMPDirectiveKind CancelRegion) {
6371 if (!CGF.HaveInsertPoint())
6372 return;
6373 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6374 // global_tid, kmp_int32 cncl_kind);
6375 if (auto *OMPRegionInfo =
6376 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6377 // For 'cancellation point taskgroup', the task region info may not have a
6378 // cancel. This may instead happen in another adjacent task.
6379 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6380 llvm::Value *Args[] = {
6381 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6382 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6383 // Ignore return result until untied tasks are supported.
6384 llvm::Value *Result = CGF.EmitRuntimeCall(
6385 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6386 // if (__kmpc_cancellationpoint()) {
6387 // exit from construct;
6388 // }
6389 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393 CGF.EmitBlock(ExitBB);
6394 // exit from construct;
6395 CodeGenFunction::JumpDest CancelDest =
6396 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6397 CGF.EmitBranchThroughCleanup(CancelDest);
6398 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6399 }
6400 }
6401 }
6402
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6403 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6404 const Expr *IfCond,
6405 OpenMPDirectiveKind CancelRegion) {
6406 if (!CGF.HaveInsertPoint())
6407 return;
6408 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6409 // kmp_int32 cncl_kind);
6410 if (auto *OMPRegionInfo =
6411 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6412 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6413 PrePostActionTy &) {
6414 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6415 llvm::Value *Args[] = {
6416 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6417 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6418 // Ignore return result until untied tasks are supported.
6419 llvm::Value *Result = CGF.EmitRuntimeCall(
6420 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6421 // if (__kmpc_cancel()) {
6422 // exit from construct;
6423 // }
6424 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6425 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6426 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6427 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6428 CGF.EmitBlock(ExitBB);
6429 // exit from construct;
6430 CodeGenFunction::JumpDest CancelDest =
6431 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6432 CGF.EmitBranchThroughCleanup(CancelDest);
6433 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6434 };
6435 if (IfCond) {
6436 emitIfClause(CGF, IfCond, ThenGen,
6437 [](CodeGenFunction &, PrePostActionTy &) {});
6438 } else {
6439 RegionCodeGenTy ThenRCG(ThenGen);
6440 ThenRCG(CGF);
6441 }
6442 }
6443 }
6444
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6445 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6446 const OMPExecutableDirective &D, StringRef ParentName,
6447 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6448 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6449 assert(!ParentName.empty() && "Invalid target region parent name!");
6450 HasEmittedTargetRegion = true;
6451 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6452 IsOffloadEntry, CodeGen);
6453 }
6454
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6455 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6456 const OMPExecutableDirective &D, StringRef ParentName,
6457 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6458 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6459 // Create a unique name for the entry function using the source location
6460 // information of the current target region. The name will be something like:
6461 //
6462 // __omp_offloading_DD_FFFF_PP_lBB
6463 //
6464 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6465 // mangled name of the function that encloses the target region and BB is the
6466 // line number of the target region.
6467
6468 unsigned DeviceID;
6469 unsigned FileID;
6470 unsigned Line;
6471 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6472 Line);
6473 SmallString<64> EntryFnName;
6474 {
6475 llvm::raw_svector_ostream OS(EntryFnName);
6476 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6477 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6478 }
6479
6480 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6481
6482 CodeGenFunction CGF(CGM, true);
6483 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6484 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6485
6486 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6487
6488 // If this target outline function is not an offload entry, we don't need to
6489 // register it.
6490 if (!IsOffloadEntry)
6491 return;
6492
6493 // The target region ID is used by the runtime library to identify the current
6494 // target region, so it only has to be unique and not necessarily point to
6495 // anything. It could be the pointer to the outlined function that implements
6496 // the target region, but we aren't using that so that the compiler doesn't
6497 // need to keep that, and could therefore inline the host function if proven
6498 // worthwhile during optimization. In the other hand, if emitting code for the
6499 // device, the ID has to be the function address so that it can retrieved from
6500 // the offloading entry and launched by the runtime library. We also mark the
6501 // outlined function to have external linkage in case we are emitting code for
6502 // the device, because these functions will be entry points to the device.
6503
6504 if (CGM.getLangOpts().OpenMPIsDevice) {
6505 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6506 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6507 OutlinedFn->setDSOLocal(false);
6508 } else {
6509 std::string Name = getName({EntryFnName, "region_id"});
6510 OutlinedFnID = new llvm::GlobalVariable(
6511 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6512 llvm::GlobalValue::WeakAnyLinkage,
6513 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6514 }
6515
6516 // Register the information for the entry associated with this target region.
6517 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6518 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6519 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6520 }
6521
6522 /// Checks if the expression is constant or does not have non-trivial function
6523 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6524 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6525 // We can skip constant expressions.
6526 // We can skip expressions with trivial calls or simple expressions.
6527 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6528 !E->hasNonTrivialCall(Ctx)) &&
6529 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6530 }
6531
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6532 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6533 const Stmt *Body) {
6534 const Stmt *Child = Body->IgnoreContainers();
6535 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6536 Child = nullptr;
6537 for (const Stmt *S : C->body()) {
6538 if (const auto *E = dyn_cast<Expr>(S)) {
6539 if (isTrivial(Ctx, E))
6540 continue;
6541 }
6542 // Some of the statements can be ignored.
6543 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6544 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6545 continue;
6546 // Analyze declarations.
6547 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6548 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6549 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6550 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6551 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6552 isa<UsingDirectiveDecl>(D) ||
6553 isa<OMPDeclareReductionDecl>(D) ||
6554 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6555 return true;
6556 const auto *VD = dyn_cast<VarDecl>(D);
6557 if (!VD)
6558 return false;
6559 return VD->isConstexpr() ||
6560 ((VD->getType().isTrivialType(Ctx) ||
6561 VD->getType()->isReferenceType()) &&
6562 (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6563 }))
6564 continue;
6565 }
6566 // Found multiple children - cannot get the one child only.
6567 if (Child)
6568 return nullptr;
6569 Child = S;
6570 }
6571 if (Child)
6572 Child = Child->IgnoreContainers();
6573 }
6574 return Child;
6575 }
6576
6577 /// Emit the number of teams for a target directive. Inspect the num_teams
6578 /// clause associated with a teams construct combined or closely nested
6579 /// with the target directive.
6580 ///
6581 /// Emit a team of size one for directives such as 'target parallel' that
6582 /// have no associated teams construct.
6583 ///
6584 /// Otherwise, return nullptr.
6585 static llvm::Value *
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6586 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6587 const OMPExecutableDirective &D) {
6588 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6589 "Clauses associated with the teams directive expected to be emitted "
6590 "only for the host!");
6591 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6593 "Expected target-based executable directive.");
6594 CGBuilderTy &Bld = CGF.Builder;
6595 switch (DirectiveKind) {
6596 case OMPD_target: {
6597 const auto *CS = D.getInnermostCapturedStmt();
6598 const auto *Body =
6599 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6600 const Stmt *ChildStmt =
6601 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6602 if (const auto *NestedDir =
6603 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6604 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6605 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6606 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6607 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6608 const Expr *NumTeams =
6609 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6610 llvm::Value *NumTeamsVal =
6611 CGF.EmitScalarExpr(NumTeams,
6612 /*IgnoreResultAssign*/ true);
6613 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6614 /*isSigned=*/true);
6615 }
6616 return Bld.getInt32(0);
6617 }
6618 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6619 isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6620 return Bld.getInt32(1);
6621 return Bld.getInt32(0);
6622 }
6623 return nullptr;
6624 }
6625 case OMPD_target_teams:
6626 case OMPD_target_teams_distribute:
6627 case OMPD_target_teams_distribute_simd:
6628 case OMPD_target_teams_distribute_parallel_for:
6629 case OMPD_target_teams_distribute_parallel_for_simd: {
6630 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6631 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6632 const Expr *NumTeams =
6633 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6634 llvm::Value *NumTeamsVal =
6635 CGF.EmitScalarExpr(NumTeams,
6636 /*IgnoreResultAssign*/ true);
6637 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6638 /*isSigned=*/true);
6639 }
6640 return Bld.getInt32(0);
6641 }
6642 case OMPD_target_parallel:
6643 case OMPD_target_parallel_for:
6644 case OMPD_target_parallel_for_simd:
6645 case OMPD_target_simd:
6646 return Bld.getInt32(1);
6647 case OMPD_parallel:
6648 case OMPD_for:
6649 case OMPD_parallel_for:
6650 case OMPD_parallel_master:
6651 case OMPD_parallel_sections:
6652 case OMPD_for_simd:
6653 case OMPD_parallel_for_simd:
6654 case OMPD_cancel:
6655 case OMPD_cancellation_point:
6656 case OMPD_ordered:
6657 case OMPD_threadprivate:
6658 case OMPD_allocate:
6659 case OMPD_task:
6660 case OMPD_simd:
6661 case OMPD_sections:
6662 case OMPD_section:
6663 case OMPD_single:
6664 case OMPD_master:
6665 case OMPD_critical:
6666 case OMPD_taskyield:
6667 case OMPD_barrier:
6668 case OMPD_taskwait:
6669 case OMPD_taskgroup:
6670 case OMPD_atomic:
6671 case OMPD_flush:
6672 case OMPD_teams:
6673 case OMPD_target_data:
6674 case OMPD_target_exit_data:
6675 case OMPD_target_enter_data:
6676 case OMPD_distribute:
6677 case OMPD_distribute_simd:
6678 case OMPD_distribute_parallel_for:
6679 case OMPD_distribute_parallel_for_simd:
6680 case OMPD_teams_distribute:
6681 case OMPD_teams_distribute_simd:
6682 case OMPD_teams_distribute_parallel_for:
6683 case OMPD_teams_distribute_parallel_for_simd:
6684 case OMPD_target_update:
6685 case OMPD_declare_simd:
6686 case OMPD_declare_variant:
6687 case OMPD_declare_target:
6688 case OMPD_end_declare_target:
6689 case OMPD_declare_reduction:
6690 case OMPD_declare_mapper:
6691 case OMPD_taskloop:
6692 case OMPD_taskloop_simd:
6693 case OMPD_master_taskloop:
6694 case OMPD_master_taskloop_simd:
6695 case OMPD_parallel_master_taskloop:
6696 case OMPD_parallel_master_taskloop_simd:
6697 case OMPD_requires:
6698 case OMPD_unknown:
6699 break;
6700 }
6701 llvm_unreachable("Unexpected directive kind.");
6702 }
6703
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6704 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6705 llvm::Value *DefaultThreadLimitVal) {
6706 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6707 CGF.getContext(), CS->getCapturedStmt());
6708 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6709 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6710 llvm::Value *NumThreads = nullptr;
6711 llvm::Value *CondVal = nullptr;
6712 // Handle if clause. If if clause present, the number of threads is
6713 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6714 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6715 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6716 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6717 const OMPIfClause *IfClause = nullptr;
6718 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6719 if (C->getNameModifier() == OMPD_unknown ||
6720 C->getNameModifier() == OMPD_parallel) {
6721 IfClause = C;
6722 break;
6723 }
6724 }
6725 if (IfClause) {
6726 const Expr *Cond = IfClause->getCondition();
6727 bool Result;
6728 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6729 if (!Result)
6730 return CGF.Builder.getInt32(1);
6731 } else {
6732 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6733 if (const auto *PreInit =
6734 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6735 for (const auto *I : PreInit->decls()) {
6736 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6737 CGF.EmitVarDecl(cast<VarDecl>(*I));
6738 } else {
6739 CodeGenFunction::AutoVarEmission Emission =
6740 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6741 CGF.EmitAutoVarCleanups(Emission);
6742 }
6743 }
6744 }
6745 CondVal = CGF.EvaluateExprAsBool(Cond);
6746 }
6747 }
6748 }
6749 // Check the value of num_threads clause iff if clause was not specified
6750 // or is not evaluated to false.
6751 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6752 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6753 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6754 const auto *NumThreadsClause =
6755 Dir->getSingleClause<OMPNumThreadsClause>();
6756 CodeGenFunction::LexicalScope Scope(
6757 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6758 if (const auto *PreInit =
6759 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6760 for (const auto *I : PreInit->decls()) {
6761 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6762 CGF.EmitVarDecl(cast<VarDecl>(*I));
6763 } else {
6764 CodeGenFunction::AutoVarEmission Emission =
6765 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6766 CGF.EmitAutoVarCleanups(Emission);
6767 }
6768 }
6769 }
6770 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6771 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6772 /*isSigned=*/false);
6773 if (DefaultThreadLimitVal)
6774 NumThreads = CGF.Builder.CreateSelect(
6775 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6776 DefaultThreadLimitVal, NumThreads);
6777 } else {
6778 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6779 : CGF.Builder.getInt32(0);
6780 }
6781 // Process condition of the if clause.
6782 if (CondVal) {
6783 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6784 CGF.Builder.getInt32(1));
6785 }
6786 return NumThreads;
6787 }
6788 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6789 return CGF.Builder.getInt32(1);
6790 return DefaultThreadLimitVal;
6791 }
6792 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6793 : CGF.Builder.getInt32(0);
6794 }
6795
6796 /// Emit the number of threads for a target directive. Inspect the
6797 /// thread_limit clause associated with a teams construct combined or closely
6798 /// nested with the target directive.
6799 ///
6800 /// Emit the num_threads clause for directives such as 'target parallel' that
6801 /// have no associated teams construct.
6802 ///
6803 /// Otherwise, return nullptr.
6804 static llvm::Value *
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6805 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6806 const OMPExecutableDirective &D) {
6807 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6808 "Clauses associated with the teams directive expected to be emitted "
6809 "only for the host!");
6810 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6811 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6812 "Expected target-based executable directive.");
6813 CGBuilderTy &Bld = CGF.Builder;
6814 llvm::Value *ThreadLimitVal = nullptr;
6815 llvm::Value *NumThreadsVal = nullptr;
6816 switch (DirectiveKind) {
6817 case OMPD_target: {
6818 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6819 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6820 return NumThreads;
6821 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6822 CGF.getContext(), CS->getCapturedStmt());
6823 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6824 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6825 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827 const auto *ThreadLimitClause =
6828 Dir->getSingleClause<OMPThreadLimitClause>();
6829 CodeGenFunction::LexicalScope Scope(
6830 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6831 if (const auto *PreInit =
6832 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6833 for (const auto *I : PreInit->decls()) {
6834 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835 CGF.EmitVarDecl(cast<VarDecl>(*I));
6836 } else {
6837 CodeGenFunction::AutoVarEmission Emission =
6838 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839 CGF.EmitAutoVarCleanups(Emission);
6840 }
6841 }
6842 }
6843 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6844 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6845 ThreadLimitVal =
6846 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6847 }
6848 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6849 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6850 CS = Dir->getInnermostCapturedStmt();
6851 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6852 CGF.getContext(), CS->getCapturedStmt());
6853 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6854 }
6855 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6856 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6857 CS = Dir->getInnermostCapturedStmt();
6858 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6859 return NumThreads;
6860 }
6861 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6862 return Bld.getInt32(1);
6863 }
6864 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6865 }
6866 case OMPD_target_teams: {
6867 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6868 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6869 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6870 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872 ThreadLimitVal =
6873 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874 }
6875 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6876 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6877 return NumThreads;
6878 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879 CGF.getContext(), CS->getCapturedStmt());
6880 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6881 if (Dir->getDirectiveKind() == OMPD_distribute) {
6882 CS = Dir->getInnermostCapturedStmt();
6883 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6884 return NumThreads;
6885 }
6886 }
6887 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6888 }
6889 case OMPD_target_teams_distribute:
6890 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6891 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6892 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6893 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6894 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6895 ThreadLimitVal =
6896 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6897 }
6898 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6899 case OMPD_target_parallel:
6900 case OMPD_target_parallel_for:
6901 case OMPD_target_parallel_for_simd:
6902 case OMPD_target_teams_distribute_parallel_for:
6903 case OMPD_target_teams_distribute_parallel_for_simd: {
6904 llvm::Value *CondVal = nullptr;
6905 // Handle if clause. If if clause present, the number of threads is
6906 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6907 if (D.hasClausesOfKind<OMPIfClause>()) {
6908 const OMPIfClause *IfClause = nullptr;
6909 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6910 if (C->getNameModifier() == OMPD_unknown ||
6911 C->getNameModifier() == OMPD_parallel) {
6912 IfClause = C;
6913 break;
6914 }
6915 }
6916 if (IfClause) {
6917 const Expr *Cond = IfClause->getCondition();
6918 bool Result;
6919 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6920 if (!Result)
6921 return Bld.getInt32(1);
6922 } else {
6923 CodeGenFunction::RunCleanupsScope Scope(CGF);
6924 CondVal = CGF.EvaluateExprAsBool(Cond);
6925 }
6926 }
6927 }
6928 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6929 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6930 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6931 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6932 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6933 ThreadLimitVal =
6934 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6935 }
6936 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6937 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6938 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6939 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6940 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6941 NumThreadsVal =
6942 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6943 ThreadLimitVal = ThreadLimitVal
6944 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6945 ThreadLimitVal),
6946 NumThreadsVal, ThreadLimitVal)
6947 : NumThreadsVal;
6948 }
6949 if (!ThreadLimitVal)
6950 ThreadLimitVal = Bld.getInt32(0);
6951 if (CondVal)
6952 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6953 return ThreadLimitVal;
6954 }
6955 case OMPD_target_teams_distribute_simd:
6956 case OMPD_target_simd:
6957 return Bld.getInt32(1);
6958 case OMPD_parallel:
6959 case OMPD_for:
6960 case OMPD_parallel_for:
6961 case OMPD_parallel_master:
6962 case OMPD_parallel_sections:
6963 case OMPD_for_simd:
6964 case OMPD_parallel_for_simd:
6965 case OMPD_cancel:
6966 case OMPD_cancellation_point:
6967 case OMPD_ordered:
6968 case OMPD_threadprivate:
6969 case OMPD_allocate:
6970 case OMPD_task:
6971 case OMPD_simd:
6972 case OMPD_sections:
6973 case OMPD_section:
6974 case OMPD_single:
6975 case OMPD_master:
6976 case OMPD_critical:
6977 case OMPD_taskyield:
6978 case OMPD_barrier:
6979 case OMPD_taskwait:
6980 case OMPD_taskgroup:
6981 case OMPD_atomic:
6982 case OMPD_flush:
6983 case OMPD_teams:
6984 case OMPD_target_data:
6985 case OMPD_target_exit_data:
6986 case OMPD_target_enter_data:
6987 case OMPD_distribute:
6988 case OMPD_distribute_simd:
6989 case OMPD_distribute_parallel_for:
6990 case OMPD_distribute_parallel_for_simd:
6991 case OMPD_teams_distribute:
6992 case OMPD_teams_distribute_simd:
6993 case OMPD_teams_distribute_parallel_for:
6994 case OMPD_teams_distribute_parallel_for_simd:
6995 case OMPD_target_update:
6996 case OMPD_declare_simd:
6997 case OMPD_declare_variant:
6998 case OMPD_declare_target:
6999 case OMPD_end_declare_target:
7000 case OMPD_declare_reduction:
7001 case OMPD_declare_mapper:
7002 case OMPD_taskloop:
7003 case OMPD_taskloop_simd:
7004 case OMPD_master_taskloop:
7005 case OMPD_master_taskloop_simd:
7006 case OMPD_parallel_master_taskloop:
7007 case OMPD_parallel_master_taskloop_simd:
7008 case OMPD_requires:
7009 case OMPD_unknown:
7010 break;
7011 }
7012 llvm_unreachable("Unsupported directive kind.");
7013 }
7014
7015 namespace {
7016 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7017
7018 // Utility to handle information from clauses associated with a given
7019 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7020 // It provides a convenient interface to obtain the information and generate
7021 // code for that information.
7022 class MappableExprsHandler {
7023 public:
7024 /// Values for bit flags used to specify the mapping type for
7025 /// offloading.
7026 enum OpenMPOffloadMappingFlags : uint64_t {
7027 /// No flags
7028 OMP_MAP_NONE = 0x0,
7029 /// Allocate memory on the device and move data from host to device.
7030 OMP_MAP_TO = 0x01,
7031 /// Allocate memory on the device and move data from device to host.
7032 OMP_MAP_FROM = 0x02,
7033 /// Always perform the requested mapping action on the element, even
7034 /// if it was already mapped before.
7035 OMP_MAP_ALWAYS = 0x04,
7036 /// Delete the element from the device environment, ignoring the
7037 /// current reference count associated with the element.
7038 OMP_MAP_DELETE = 0x08,
7039 /// The element being mapped is a pointer-pointee pair; both the
7040 /// pointer and the pointee should be mapped.
7041 OMP_MAP_PTR_AND_OBJ = 0x10,
7042 /// This flags signals that the base address of an entry should be
7043 /// passed to the target kernel as an argument.
7044 OMP_MAP_TARGET_PARAM = 0x20,
7045 /// Signal that the runtime library has to return the device pointer
7046 /// in the current position for the data being mapped. Used when we have the
7047 /// use_device_ptr clause.
7048 OMP_MAP_RETURN_PARAM = 0x40,
7049 /// This flag signals that the reference being passed is a pointer to
7050 /// private data.
7051 OMP_MAP_PRIVATE = 0x80,
7052 /// Pass the element to the device by value.
7053 OMP_MAP_LITERAL = 0x100,
7054 /// Implicit map
7055 OMP_MAP_IMPLICIT = 0x200,
7056 /// Close is a hint to the runtime to allocate memory close to
7057 /// the target device.
7058 OMP_MAP_CLOSE = 0x400,
7059 /// The 16 MSBs of the flags indicate whether the entry is member of some
7060 /// struct/class.
7061 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7062 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7063 };
7064
7065 /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()7066 static unsigned getFlagMemberOffset() {
7067 unsigned Offset = 0;
7068 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7069 Remain = Remain >> 1)
7070 Offset++;
7071 return Offset;
7072 }
7073
7074 /// Class that associates information with a base pointer to be passed to the
7075 /// runtime library.
7076 class BasePointerInfo {
7077 /// The base pointer.
7078 llvm::Value *Ptr = nullptr;
7079 /// The base declaration that refers to this device pointer, or null if
7080 /// there is none.
7081 const ValueDecl *DevPtrDecl = nullptr;
7082
7083 public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)7084 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7085 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const7086 llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const7087 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)7088 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7089 };
7090
7091 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7092 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7093 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7094
7095 /// Map between a struct and the its lowest & highest elements which have been
7096 /// mapped.
7097 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7098 /// HE(FieldIndex, Pointer)}
7099 struct StructRangeInfoTy {
7100 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7101 0, Address::invalid()};
7102 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7103 0, Address::invalid()};
7104 Address Base = Address::invalid();
7105 };
7106
7107 private:
7108 /// Kind that defines how a device pointer has to be returned.
7109 struct MapInfo {
7110 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7111 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7112 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7113 bool ReturnDevicePointer = false;
7114 bool IsImplicit = false;
7115
7116 MapInfo() = default;
MapInfo__anona2876c6b3511::MappableExprsHandler::MapInfo7117 MapInfo(
7118 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7119 OpenMPMapClauseKind MapType,
7120 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7121 bool ReturnDevicePointer, bool IsImplicit)
7122 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7123 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7124 };
7125
7126 /// If use_device_ptr is used on a pointer which is a struct member and there
7127 /// is no map information about it, then emission of that entry is deferred
7128 /// until the whole struct has been processed.
7129 struct DeferredDevicePtrEntryTy {
7130 const Expr *IE = nullptr;
7131 const ValueDecl *VD = nullptr;
7132
DeferredDevicePtrEntryTy__anona2876c6b3511::MappableExprsHandler::DeferredDevicePtrEntryTy7133 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7134 : IE(IE), VD(VD) {}
7135 };
7136
7137 /// The target directive from where the mappable clauses were extracted. It
7138 /// is either a executable directive or a user-defined mapper directive.
7139 llvm::PointerUnion<const OMPExecutableDirective *,
7140 const OMPDeclareMapperDecl *>
7141 CurDir;
7142
7143 /// Function the directive is being generated for.
7144 CodeGenFunction &CGF;
7145
7146 /// Set of all first private variables in the current directive.
7147 /// bool data is set to true if the variable is implicitly marked as
7148 /// firstprivate, false otherwise.
7149 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7150
7151 /// Map between device pointer declarations and their expression components.
7152 /// The key value for declarations in 'this' is null.
7153 llvm::DenseMap<
7154 const ValueDecl *,
7155 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7156 DevPointersMap;
7157
getExprTypeSize(const Expr * E) const7158 llvm::Value *getExprTypeSize(const Expr *E) const {
7159 QualType ExprTy = E->getType().getCanonicalType();
7160
7161 // Reference types are ignored for mapping purposes.
7162 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7163 ExprTy = RefTy->getPointeeType().getCanonicalType();
7164
7165 // Given that an array section is considered a built-in type, we need to
7166 // do the calculation based on the length of the section instead of relying
7167 // on CGF.getTypeSize(E->getType()).
7168 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7169 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7170 OAE->getBase()->IgnoreParenImpCasts())
7171 .getCanonicalType();
7172
7173 // If there is no length associated with the expression and lower bound is
7174 // not specified too, that means we are using the whole length of the
7175 // base.
7176 if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7177 !OAE->getLowerBound())
7178 return CGF.getTypeSize(BaseTy);
7179
7180 llvm::Value *ElemSize;
7181 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7182 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7183 } else {
7184 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7185 assert(ATy && "Expecting array type if not a pointer type.");
7186 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7187 }
7188
7189 // If we don't have a length at this point, that is because we have an
7190 // array section with a single element.
7191 if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7192 return ElemSize;
7193
7194 if (const Expr *LenExpr = OAE->getLength()) {
7195 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7196 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7197 CGF.getContext().getSizeType(),
7198 LenExpr->getExprLoc());
7199 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7200 }
7201 assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7202 OAE->getLowerBound() && "expected array_section[lb:].");
7203 // Size = sizetype - lb * elemtype;
7204 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7205 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7206 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7207 CGF.getContext().getSizeType(),
7208 OAE->getLowerBound()->getExprLoc());
7209 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7210 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7211 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7212 LengthVal = CGF.Builder.CreateSelect(
7213 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7214 return LengthVal;
7215 }
7216 return CGF.getTypeSize(ExprTy);
7217 }
7218
7219 /// Return the corresponding bits for a given map clause modifier. Add
7220 /// a flag marking the map as a pointer if requested. Add a flag marking the
7221 /// map as the first one of a series of maps that relate to the same map
7222 /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag) const7223 OpenMPOffloadMappingFlags getMapTypeBits(
7224 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7226 OpenMPOffloadMappingFlags Bits =
7227 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7228 switch (MapType) {
7229 case OMPC_MAP_alloc:
7230 case OMPC_MAP_release:
7231 // alloc and release is the default behavior in the runtime library, i.e.
7232 // if we don't pass any bits alloc/release that is what the runtime is
7233 // going to do. Therefore, we don't need to signal anything for these two
7234 // type modifiers.
7235 break;
7236 case OMPC_MAP_to:
7237 Bits |= OMP_MAP_TO;
7238 break;
7239 case OMPC_MAP_from:
7240 Bits |= OMP_MAP_FROM;
7241 break;
7242 case OMPC_MAP_tofrom:
7243 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7244 break;
7245 case OMPC_MAP_delete:
7246 Bits |= OMP_MAP_DELETE;
7247 break;
7248 case OMPC_MAP_unknown:
7249 llvm_unreachable("Unexpected map type!");
7250 }
7251 if (AddPtrFlag)
7252 Bits |= OMP_MAP_PTR_AND_OBJ;
7253 if (AddIsTargetParamFlag)
7254 Bits |= OMP_MAP_TARGET_PARAM;
7255 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7256 != MapModifiers.end())
7257 Bits |= OMP_MAP_ALWAYS;
7258 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7259 != MapModifiers.end())
7260 Bits |= OMP_MAP_CLOSE;
7261 return Bits;
7262 }
7263
7264 /// Return true if the provided expression is a final array section. A
7265 /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7266 bool isFinalArraySectionExpression(const Expr *E) const {
7267 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7268
7269 // It is not an array section and therefore not a unity-size one.
7270 if (!OASE)
7271 return false;
7272
7273 // An array section with no colon always refer to a single element.
7274 if (OASE->getColonLoc().isInvalid())
7275 return false;
7276
7277 const Expr *Length = OASE->getLength();
7278
7279 // If we don't have a length we have to check if the array has size 1
7280 // for this dimension. Also, we should always expect a length if the
7281 // base type is pointer.
7282 if (!Length) {
7283 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7284 OASE->getBase()->IgnoreParenImpCasts())
7285 .getCanonicalType();
7286 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7287 return ATy->getSize().getSExtValue() != 1;
7288 // If we don't have a constant dimension length, we have to consider
7289 // the current section as having any size, so it is not necessarily
7290 // unitary. If it happen to be unity size, that's user fault.
7291 return true;
7292 }
7293
7294 // Check if the length evaluates to 1.
7295 Expr::EvalResult Result;
7296 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7297 return true; // Can have more that size 1.
7298
7299 llvm::APSInt ConstLength = Result.Val.getInt();
7300 return ConstLength.getSExtValue() != 1;
7301 }
7302
7303 /// Generate the base pointers, section pointers, sizes and map type
7304 /// bits for the provided map type, map modifier, and expression components.
7305 /// \a IsFirstComponent should be set to true if the provided set of
7306 /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7307 void generateInfoForComponentList(
7308 OpenMPMapClauseKind MapType,
7309 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7310 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7311 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7312 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7313 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7314 bool IsImplicit,
7315 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7316 OverlappedElements = llvm::None) const {
7317 // The following summarizes what has to be generated for each map and the
7318 // types below. The generated information is expressed in this order:
7319 // base pointer, section pointer, size, flags
7320 // (to add to the ones that come from the map type and modifier).
7321 //
7322 // double d;
7323 // int i[100];
7324 // float *p;
7325 //
7326 // struct S1 {
7327 // int i;
7328 // float f[50];
7329 // }
7330 // struct S2 {
7331 // int i;
7332 // float f[50];
7333 // S1 s;
7334 // double *p;
7335 // struct S2 *ps;
7336 // }
7337 // S2 s;
7338 // S2 *ps;
7339 //
7340 // map(d)
7341 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7342 //
7343 // map(i)
7344 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7345 //
7346 // map(i[1:23])
7347 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7348 //
7349 // map(p)
7350 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7351 //
7352 // map(p[1:24])
7353 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7354 //
7355 // map(s)
7356 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7357 //
7358 // map(s.i)
7359 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7360 //
7361 // map(s.s.f)
7362 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7363 //
7364 // map(s.p)
7365 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7366 //
7367 // map(to: s.p[:22])
7368 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7369 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7370 // &(s.p), &(s.p[0]), 22*sizeof(double),
7371 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7372 // (*) alloc space for struct members, only this is a target parameter
7373 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7374 // optimizes this entry out, same in the examples below)
7375 // (***) map the pointee (map: to)
7376 //
7377 // map(s.ps)
7378 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7379 //
7380 // map(from: s.ps->s.i)
7381 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7382 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7383 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7384 //
7385 // map(to: s.ps->ps)
7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7389 //
7390 // map(s.ps->ps->ps)
7391 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7392 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7393 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7394 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7395 //
7396 // map(to: s.ps->ps->s.f[:22])
7397 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7398 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7399 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7400 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7401 //
7402 // map(ps)
7403 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7404 //
7405 // map(ps->i)
7406 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7407 //
7408 // map(ps->s.f)
7409 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7410 //
7411 // map(from: ps->p)
7412 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7413 //
7414 // map(to: ps->p[:22])
7415 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7416 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7417 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7418 //
7419 // map(ps->ps)
7420 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7421 //
7422 // map(from: ps->ps->s.i)
7423 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7424 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7425 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7426 //
7427 // map(from: ps->ps->ps)
7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7431 //
7432 // map(ps->ps->ps->ps)
7433 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7434 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7435 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7436 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7437 //
7438 // map(to: ps->ps->ps->s.f[:22])
7439 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7440 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7441 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7442 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7443 //
7444 // map(to: s.f[:22]) map(from: s.p[:33])
7445 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7446 // sizeof(double*) (**), TARGET_PARAM
7447 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7448 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7449 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7450 // (*) allocate contiguous space needed to fit all mapped members even if
7451 // we allocate space for members not mapped (in this example,
7452 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7453 // them as well because they fall between &s.f[0] and &s.p)
7454 //
7455 // map(from: s.f[:22]) map(to: ps->p[:33])
7456 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7457 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7458 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7459 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7460 // (*) the struct this entry pertains to is the 2nd element in the list of
7461 // arguments, hence MEMBER_OF(2)
7462 //
7463 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7464 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7465 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7466 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7467 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7468 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7469 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7470 // (*) the struct this entry pertains to is the 4th element in the list
7471 // of arguments, hence MEMBER_OF(4)
7472
7473 // Track if the map information being generated is the first for a capture.
7474 bool IsCaptureFirstInfo = IsFirstComponentList;
7475 // When the variable is on a declare target link or in a to clause with
7476 // unified memory, a reference is needed to hold the host/device address
7477 // of the variable.
7478 bool RequiresReference = false;
7479
7480 // Scan the components from the base to the complete expression.
7481 auto CI = Components.rbegin();
7482 auto CE = Components.rend();
7483 auto I = CI;
7484
7485 // Track if the map information being generated is the first for a list of
7486 // components.
7487 bool IsExpressionFirstInfo = true;
7488 Address BP = Address::invalid();
7489 const Expr *AssocExpr = I->getAssociatedExpression();
7490 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7491 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7492
7493 if (isa<MemberExpr>(AssocExpr)) {
7494 // The base is the 'this' pointer. The content of the pointer is going
7495 // to be the base of the field being mapped.
7496 BP = CGF.LoadCXXThisAddress();
7497 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7498 (OASE &&
7499 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7500 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7501 } else {
7502 // The base is the reference to the variable.
7503 // BP = &Var.
7504 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7505 if (const auto *VD =
7506 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7507 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7508 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7509 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7510 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7511 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7512 RequiresReference = true;
7513 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7514 }
7515 }
7516 }
7517
7518 // If the variable is a pointer and is being dereferenced (i.e. is not
7519 // the last component), the base has to be the pointer itself, not its
7520 // reference. References are ignored for mapping purposes.
7521 QualType Ty =
7522 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7523 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7524 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7525
7526 // We do not need to generate individual map information for the
7527 // pointer, it can be associated with the combined storage.
7528 ++I;
7529 }
7530 }
7531
7532 // Track whether a component of the list should be marked as MEMBER_OF some
7533 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7534 // in a component list should be marked as MEMBER_OF, all subsequent entries
7535 // do not belong to the base struct. E.g.
7536 // struct S2 s;
7537 // s.ps->ps->ps->f[:]
7538 // (1) (2) (3) (4)
7539 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7540 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7541 // is the pointee of ps(2) which is not member of struct s, so it should not
7542 // be marked as such (it is still PTR_AND_OBJ).
7543 // The variable is initialized to false so that PTR_AND_OBJ entries which
7544 // are not struct members are not considered (e.g. array of pointers to
7545 // data).
7546 bool ShouldBeMemberOf = false;
7547
7548 // Variable keeping track of whether or not we have encountered a component
7549 // in the component list which is a member expression. Useful when we have a
7550 // pointer or a final array section, in which case it is the previous
7551 // component in the list which tells us whether we have a member expression.
7552 // E.g. X.f[:]
7553 // While processing the final array section "[:]" it is "f" which tells us
7554 // whether we are dealing with a member of a declared struct.
7555 const MemberExpr *EncounteredME = nullptr;
7556
7557 for (; I != CE; ++I) {
7558 // If the current component is member of a struct (parent struct) mark it.
7559 if (!EncounteredME) {
7560 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7561 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7562 // as MEMBER_OF the parent struct.
7563 if (EncounteredME)
7564 ShouldBeMemberOf = true;
7565 }
7566
7567 auto Next = std::next(I);
7568
7569 // We need to generate the addresses and sizes if this is the last
7570 // component, if the component is a pointer or if it is an array section
7571 // whose length can't be proved to be one. If this is a pointer, it
7572 // becomes the base address for the following components.
7573
7574 // A final array section, is one whose length can't be proved to be one.
7575 bool IsFinalArraySection =
7576 isFinalArraySectionExpression(I->getAssociatedExpression());
7577
7578 // Get information on whether the element is a pointer. Have to do a
7579 // special treatment for array sections given that they are built-in
7580 // types.
7581 const auto *OASE =
7582 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7583 bool IsPointer =
7584 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7585 .getCanonicalType()
7586 ->isAnyPointerType()) ||
7587 I->getAssociatedExpression()->getType()->isAnyPointerType();
7588
7589 if (Next == CE || IsPointer || IsFinalArraySection) {
7590 // If this is not the last component, we expect the pointer to be
7591 // associated with an array expression or member expression.
7592 assert((Next == CE ||
7593 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7594 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7595 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7596 "Unexpected expression");
7597
7598 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7599 .getAddress(CGF);
7600
7601 // If this component is a pointer inside the base struct then we don't
7602 // need to create any entry for it - it will be combined with the object
7603 // it is pointing to into a single PTR_AND_OBJ entry.
7604 bool IsMemberPointer =
7605 IsPointer && EncounteredME &&
7606 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7607 EncounteredME);
7608 if (!OverlappedElements.empty()) {
7609 // Handle base element with the info for overlapped elements.
7610 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7611 assert(Next == CE &&
7612 "Expected last element for the overlapped elements.");
7613 assert(!IsPointer &&
7614 "Unexpected base element with the pointer type.");
7615 // Mark the whole struct as the struct that requires allocation on the
7616 // device.
7617 PartialStruct.LowestElem = {0, LB};
7618 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7619 I->getAssociatedExpression()->getType());
7620 Address HB = CGF.Builder.CreateConstGEP(
7621 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7622 CGF.VoidPtrTy),
7623 TypeSize.getQuantity() - 1);
7624 PartialStruct.HighestElem = {
7625 std::numeric_limits<decltype(
7626 PartialStruct.HighestElem.first)>::max(),
7627 HB};
7628 PartialStruct.Base = BP;
7629 // Emit data for non-overlapped data.
7630 OpenMPOffloadMappingFlags Flags =
7631 OMP_MAP_MEMBER_OF |
7632 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7633 /*AddPtrFlag=*/false,
7634 /*AddIsTargetParamFlag=*/false);
7635 LB = BP;
7636 llvm::Value *Size = nullptr;
7637 // Do bitcopy of all non-overlapped structure elements.
7638 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7639 Component : OverlappedElements) {
7640 Address ComponentLB = Address::invalid();
7641 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7642 Component) {
7643 if (MC.getAssociatedDeclaration()) {
7644 ComponentLB =
7645 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7646 .getAddress(CGF);
7647 Size = CGF.Builder.CreatePtrDiff(
7648 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7649 CGF.EmitCastToVoidPtr(LB.getPointer()));
7650 break;
7651 }
7652 }
7653 BasePointers.push_back(BP.getPointer());
7654 Pointers.push_back(LB.getPointer());
7655 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7656 /*isSigned=*/true));
7657 Types.push_back(Flags);
7658 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7659 }
7660 BasePointers.push_back(BP.getPointer());
7661 Pointers.push_back(LB.getPointer());
7662 Size = CGF.Builder.CreatePtrDiff(
7663 CGF.EmitCastToVoidPtr(
7664 CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7665 CGF.EmitCastToVoidPtr(LB.getPointer()));
7666 Sizes.push_back(
7667 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7668 Types.push_back(Flags);
7669 break;
7670 }
7671 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7672 if (!IsMemberPointer) {
7673 BasePointers.push_back(BP.getPointer());
7674 Pointers.push_back(LB.getPointer());
7675 Sizes.push_back(
7676 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7677
7678 // We need to add a pointer flag for each map that comes from the
7679 // same expression except for the first one. We also need to signal
7680 // this map is the first one that relates with the current capture
7681 // (there is a set of entries for each capture).
7682 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7683 MapType, MapModifiers, IsImplicit,
7684 !IsExpressionFirstInfo || RequiresReference,
7685 IsCaptureFirstInfo && !RequiresReference);
7686
7687 if (!IsExpressionFirstInfo) {
7688 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7689 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7690 if (IsPointer)
7691 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7692 OMP_MAP_DELETE | OMP_MAP_CLOSE);
7693
7694 if (ShouldBeMemberOf) {
7695 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696 // should be later updated with the correct value of MEMBER_OF.
7697 Flags |= OMP_MAP_MEMBER_OF;
7698 // From now on, all subsequent PTR_AND_OBJ entries should not be
7699 // marked as MEMBER_OF.
7700 ShouldBeMemberOf = false;
7701 }
7702 }
7703
7704 Types.push_back(Flags);
7705 }
7706
7707 // If we have encountered a member expression so far, keep track of the
7708 // mapped member. If the parent is "*this", then the value declaration
7709 // is nullptr.
7710 if (EncounteredME) {
7711 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7712 unsigned FieldIndex = FD->getFieldIndex();
7713
7714 // Update info about the lowest and highest elements for this struct
7715 if (!PartialStruct.Base.isValid()) {
7716 PartialStruct.LowestElem = {FieldIndex, LB};
7717 PartialStruct.HighestElem = {FieldIndex, LB};
7718 PartialStruct.Base = BP;
7719 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7720 PartialStruct.LowestElem = {FieldIndex, LB};
7721 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7722 PartialStruct.HighestElem = {FieldIndex, LB};
7723 }
7724 }
7725
7726 // If we have a final array section, we are done with this expression.
7727 if (IsFinalArraySection)
7728 break;
7729
7730 // The pointer becomes the base for the next element.
7731 if (Next != CE)
7732 BP = LB;
7733
7734 IsExpressionFirstInfo = false;
7735 IsCaptureFirstInfo = false;
7736 }
7737 }
7738 }
7739
7740 /// Return the adjusted map modifiers if the declaration a capture refers to
7741 /// appears in a first-private clause. This is expected to be used only with
7742 /// directives that start with 'target'.
7743 MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7744 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7745 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7746
7747 // A first private variable captured by reference will use only the
7748 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7749 // declaration is known as first-private in this handler.
7750 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7751 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7752 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7753 return MappableExprsHandler::OMP_MAP_ALWAYS |
7754 MappableExprsHandler::OMP_MAP_TO;
7755 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7756 return MappableExprsHandler::OMP_MAP_TO |
7757 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7758 return MappableExprsHandler::OMP_MAP_PRIVATE |
7759 MappableExprsHandler::OMP_MAP_TO;
7760 }
7761 return MappableExprsHandler::OMP_MAP_TO |
7762 MappableExprsHandler::OMP_MAP_FROM;
7763 }
7764
getMemberOfFlag(unsigned Position)7765 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7766 // Rotate by getFlagMemberOffset() bits.
7767 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7768 << getFlagMemberOffset());
7769 }
7770
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)7771 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7772 OpenMPOffloadMappingFlags MemberOfFlag) {
7773 // If the entry is PTR_AND_OBJ but has not been marked with the special
7774 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7775 // marked as MEMBER_OF.
7776 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7777 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7778 return;
7779
7780 // Reset the placeholder value to prepare the flag for the assignment of the
7781 // proper MEMBER_OF value.
7782 Flags &= ~OMP_MAP_MEMBER_OF;
7783 Flags |= MemberOfFlag;
7784 }
7785
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7786 void getPlainLayout(const CXXRecordDecl *RD,
7787 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7788 bool AsBase) const {
7789 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7790
7791 llvm::StructType *St =
7792 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7793
7794 unsigned NumElements = St->getNumElements();
7795 llvm::SmallVector<
7796 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7797 RecordLayout(NumElements);
7798
7799 // Fill bases.
7800 for (const auto &I : RD->bases()) {
7801 if (I.isVirtual())
7802 continue;
7803 const auto *Base = I.getType()->getAsCXXRecordDecl();
7804 // Ignore empty bases.
7805 if (Base->isEmpty() || CGF.getContext()
7806 .getASTRecordLayout(Base)
7807 .getNonVirtualSize()
7808 .isZero())
7809 continue;
7810
7811 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7812 RecordLayout[FieldIndex] = Base;
7813 }
7814 // Fill in virtual bases.
7815 for (const auto &I : RD->vbases()) {
7816 const auto *Base = I.getType()->getAsCXXRecordDecl();
7817 // Ignore empty bases.
7818 if (Base->isEmpty())
7819 continue;
7820 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7821 if (RecordLayout[FieldIndex])
7822 continue;
7823 RecordLayout[FieldIndex] = Base;
7824 }
7825 // Fill in all the fields.
7826 assert(!RD->isUnion() && "Unexpected union.");
7827 for (const auto *Field : RD->fields()) {
7828 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7829 // will fill in later.)
7830 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7831 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7832 RecordLayout[FieldIndex] = Field;
7833 }
7834 }
7835 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7836 &Data : RecordLayout) {
7837 if (Data.isNull())
7838 continue;
7839 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7840 getPlainLayout(Base, Layout, /*AsBase=*/true);
7841 else
7842 Layout.push_back(Data.get<const FieldDecl *>());
7843 }
7844 }
7845
7846 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)7847 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7848 : CurDir(&Dir), CGF(CGF) {
7849 // Extract firstprivate clause information.
7850 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7851 for (const auto *D : C->varlists())
7852 FirstPrivateDecls.try_emplace(
7853 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7854 // Extract device pointer clause information.
7855 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7856 for (auto L : C->component_lists())
7857 DevPointersMap[L.first].push_back(L.second);
7858 }
7859
7860 /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)7861 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7862 : CurDir(&Dir), CGF(CGF) {}
7863
7864 /// Generate code for the combined entry if we have a partially mapped struct
7865 /// and take care of the mapping flags of the arguments corresponding to
7866 /// individual struct members.
emitCombinedEntry(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct) const7867 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7868 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7869 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7870 const StructRangeInfoTy &PartialStruct) const {
7871 // Base is the base of the struct
7872 BasePointers.push_back(PartialStruct.Base.getPointer());
7873 // Pointer is the address of the lowest element
7874 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7875 Pointers.push_back(LB);
7876 // Size is (addr of {highest+1} element) - (addr of lowest element)
7877 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7878 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7879 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7880 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7881 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7882 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7883 /*isSigned=*/false);
7884 Sizes.push_back(Size);
7885 // Map type is always TARGET_PARAM
7886 Types.push_back(OMP_MAP_TARGET_PARAM);
7887 // Remove TARGET_PARAM flag from the first element
7888 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7889
7890 // All other current entries will be MEMBER_OF the combined entry
7891 // (except for PTR_AND_OBJ entries which do not have a placeholder value
7892 // 0xFFFF in the MEMBER_OF field).
7893 OpenMPOffloadMappingFlags MemberOfFlag =
7894 getMemberOfFlag(BasePointers.size() - 1);
7895 for (auto &M : CurTypes)
7896 setCorrectMemberOfFlag(M, MemberOfFlag);
7897 }
7898
7899 /// Generate all the base pointers, section pointers, sizes and map
7900 /// types for the extracted mappable expressions. Also, for each item that
7901 /// relates with a device pointer, a pair of the relevant declaration and
7902 /// index where it occurs is appended to the device pointers info array.
generateAllInfo(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const7903 void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7904 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7905 MapFlagsArrayTy &Types) const {
7906 // We have to process the component lists that relate with the same
7907 // declaration in a single chunk so that we can generate the map flags
7908 // correctly. Therefore, we organize all lists in a map.
7909 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7910
7911 // Helper function to fill the information map for the different supported
7912 // clauses.
7913 auto &&InfoGen = [&Info](
7914 const ValueDecl *D,
7915 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7916 OpenMPMapClauseKind MapType,
7917 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7918 bool ReturnDevicePointer, bool IsImplicit) {
7919 const ValueDecl *VD =
7920 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7921 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7922 IsImplicit);
7923 };
7924
7925 assert(CurDir.is<const OMPExecutableDirective *>() &&
7926 "Expect a executable directive");
7927 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7928 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7929 for (const auto L : C->component_lists()) {
7930 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7931 /*ReturnDevicePointer=*/false, C->isImplicit());
7932 }
7933 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7934 for (const auto L : C->component_lists()) {
7935 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7936 /*ReturnDevicePointer=*/false, C->isImplicit());
7937 }
7938 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7939 for (const auto L : C->component_lists()) {
7940 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7941 /*ReturnDevicePointer=*/false, C->isImplicit());
7942 }
7943
7944 // Look at the use_device_ptr clause information and mark the existing map
7945 // entries as such. If there is no map information for an entry in the
7946 // use_device_ptr list, we create one with map type 'alloc' and zero size
7947 // section. It is the user fault if that was not mapped before. If there is
7948 // no map information and the pointer is a struct member, then we defer the
7949 // emission of that entry until the whole struct has been processed.
7950 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7951 DeferredInfo;
7952
7953 for (const auto *C :
7954 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7955 for (const auto L : C->component_lists()) {
7956 assert(!L.second.empty() && "Not expecting empty list of components!");
7957 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7958 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7959 const Expr *IE = L.second.back().getAssociatedExpression();
7960 // If the first component is a member expression, we have to look into
7961 // 'this', which maps to null in the map of map information. Otherwise
7962 // look directly for the information.
7963 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7964
7965 // We potentially have map information for this declaration already.
7966 // Look for the first set of components that refer to it.
7967 if (It != Info.end()) {
7968 auto CI = std::find_if(
7969 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7970 return MI.Components.back().getAssociatedDeclaration() == VD;
7971 });
7972 // If we found a map entry, signal that the pointer has to be returned
7973 // and move on to the next declaration.
7974 if (CI != It->second.end()) {
7975 CI->ReturnDevicePointer = true;
7976 continue;
7977 }
7978 }
7979
7980 // We didn't find any match in our map information - generate a zero
7981 // size array section - if the pointer is a struct member we defer this
7982 // action until the whole struct has been processed.
7983 if (isa<MemberExpr>(IE)) {
7984 // Insert the pointer into Info to be processed by
7985 // generateInfoForComponentList. Because it is a member pointer
7986 // without a pointee, no entry will be generated for it, therefore
7987 // we need to generate one after the whole struct has been processed.
7988 // Nonetheless, generateInfoForComponentList must be called to take
7989 // the pointer into account for the calculation of the range of the
7990 // partial struct.
7991 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7992 /*ReturnDevicePointer=*/false, C->isImplicit());
7993 DeferredInfo[nullptr].emplace_back(IE, VD);
7994 } else {
7995 llvm::Value *Ptr =
7996 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7997 BasePointers.emplace_back(Ptr, VD);
7998 Pointers.push_back(Ptr);
7999 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8000 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8001 }
8002 }
8003 }
8004
8005 for (const auto &M : Info) {
8006 // We need to know when we generate information for the first component
8007 // associated with a capture, because the mapping flags depend on it.
8008 bool IsFirstComponentList = true;
8009
8010 // Temporary versions of arrays
8011 MapBaseValuesArrayTy CurBasePointers;
8012 MapValuesArrayTy CurPointers;
8013 MapValuesArrayTy CurSizes;
8014 MapFlagsArrayTy CurTypes;
8015 StructRangeInfoTy PartialStruct;
8016
8017 for (const MapInfo &L : M.second) {
8018 assert(!L.Components.empty() &&
8019 "Not expecting declaration with no component lists.");
8020
8021 // Remember the current base pointer index.
8022 unsigned CurrentBasePointersIdx = CurBasePointers.size();
8023 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8024 CurBasePointers, CurPointers, CurSizes,
8025 CurTypes, PartialStruct,
8026 IsFirstComponentList, L.IsImplicit);
8027
8028 // If this entry relates with a device pointer, set the relevant
8029 // declaration and add the 'return pointer' flag.
8030 if (L.ReturnDevicePointer) {
8031 assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8032 "Unexpected number of mapped base pointers.");
8033
8034 const ValueDecl *RelevantVD =
8035 L.Components.back().getAssociatedDeclaration();
8036 assert(RelevantVD &&
8037 "No relevant declaration related with device pointer??");
8038
8039 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8040 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8041 }
8042 IsFirstComponentList = false;
8043 }
8044
8045 // Append any pending zero-length pointers which are struct members and
8046 // used with use_device_ptr.
8047 auto CI = DeferredInfo.find(M.first);
8048 if (CI != DeferredInfo.end()) {
8049 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8050 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8051 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8052 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8053 CurBasePointers.emplace_back(BasePtr, L.VD);
8054 CurPointers.push_back(Ptr);
8055 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8056 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8057 // value MEMBER_OF=FFFF so that the entry is later updated with the
8058 // correct value of MEMBER_OF.
8059 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8060 OMP_MAP_MEMBER_OF);
8061 }
8062 }
8063
8064 // If there is an entry in PartialStruct it means we have a struct with
8065 // individual members mapped. Emit an extra combined entry.
8066 if (PartialStruct.Base.isValid())
8067 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8068 PartialStruct);
8069
8070 // We need to append the results of this capture to what we already have.
8071 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8072 Pointers.append(CurPointers.begin(), CurPointers.end());
8073 Sizes.append(CurSizes.begin(), CurSizes.end());
8074 Types.append(CurTypes.begin(), CurTypes.end());
8075 }
8076 }
8077
8078 /// Generate all the base pointers, section pointers, sizes and map types for
8079 /// the extracted map clauses of user-defined mapper.
generateAllInfoForMapper(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8080 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8081 MapValuesArrayTy &Pointers,
8082 MapValuesArrayTy &Sizes,
8083 MapFlagsArrayTy &Types) const {
8084 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8085 "Expect a declare mapper directive");
8086 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8087 // We have to process the component lists that relate with the same
8088 // declaration in a single chunk so that we can generate the map flags
8089 // correctly. Therefore, we organize all lists in a map.
8090 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8091
8092 // Helper function to fill the information map for the different supported
8093 // clauses.
8094 auto &&InfoGen = [&Info](
8095 const ValueDecl *D,
8096 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8097 OpenMPMapClauseKind MapType,
8098 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8099 bool ReturnDevicePointer, bool IsImplicit) {
8100 const ValueDecl *VD =
8101 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8102 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8103 IsImplicit);
8104 };
8105
8106 for (const auto *C : CurMapperDir->clauselists()) {
8107 const auto *MC = cast<OMPMapClause>(C);
8108 for (const auto L : MC->component_lists()) {
8109 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8110 /*ReturnDevicePointer=*/false, MC->isImplicit());
8111 }
8112 }
8113
8114 for (const auto &M : Info) {
8115 // We need to know when we generate information for the first component
8116 // associated with a capture, because the mapping flags depend on it.
8117 bool IsFirstComponentList = true;
8118
8119 // Temporary versions of arrays
8120 MapBaseValuesArrayTy CurBasePointers;
8121 MapValuesArrayTy CurPointers;
8122 MapValuesArrayTy CurSizes;
8123 MapFlagsArrayTy CurTypes;
8124 StructRangeInfoTy PartialStruct;
8125
8126 for (const MapInfo &L : M.second) {
8127 assert(!L.Components.empty() &&
8128 "Not expecting declaration with no component lists.");
8129 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8130 CurBasePointers, CurPointers, CurSizes,
8131 CurTypes, PartialStruct,
8132 IsFirstComponentList, L.IsImplicit);
8133 IsFirstComponentList = false;
8134 }
8135
8136 // If there is an entry in PartialStruct it means we have a struct with
8137 // individual members mapped. Emit an extra combined entry.
8138 if (PartialStruct.Base.isValid())
8139 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8140 PartialStruct);
8141
8142 // We need to append the results of this capture to what we already have.
8143 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8144 Pointers.append(CurPointers.begin(), CurPointers.end());
8145 Sizes.append(CurSizes.begin(), CurSizes.end());
8146 Types.append(CurTypes.begin(), CurTypes.end());
8147 }
8148 }
8149
8150 /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8151 void generateInfoForLambdaCaptures(
8152 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8153 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8154 MapFlagsArrayTy &Types,
8155 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8156 const auto *RD = VD->getType()
8157 .getCanonicalType()
8158 .getNonReferenceType()
8159 ->getAsCXXRecordDecl();
8160 if (!RD || !RD->isLambda())
8161 return;
8162 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8163 LValue VDLVal = CGF.MakeAddrLValue(
8164 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8165 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8166 FieldDecl *ThisCapture = nullptr;
8167 RD->getCaptureFields(Captures, ThisCapture);
8168 if (ThisCapture) {
8169 LValue ThisLVal =
8170 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8171 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8172 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8173 VDLVal.getPointer(CGF));
8174 BasePointers.push_back(ThisLVal.getPointer(CGF));
8175 Pointers.push_back(ThisLValVal.getPointer(CGF));
8176 Sizes.push_back(
8177 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178 CGF.Int64Ty, /*isSigned=*/true));
8179 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8180 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8181 }
8182 for (const LambdaCapture &LC : RD->captures()) {
8183 if (!LC.capturesVariable())
8184 continue;
8185 const VarDecl *VD = LC.getCapturedVar();
8186 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8187 continue;
8188 auto It = Captures.find(VD);
8189 assert(It != Captures.end() && "Found lambda capture without field.");
8190 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8191 if (LC.getCaptureKind() == LCK_ByRef) {
8192 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8193 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8194 VDLVal.getPointer(CGF));
8195 BasePointers.push_back(VarLVal.getPointer(CGF));
8196 Pointers.push_back(VarLValVal.getPointer(CGF));
8197 Sizes.push_back(CGF.Builder.CreateIntCast(
8198 CGF.getTypeSize(
8199 VD->getType().getCanonicalType().getNonReferenceType()),
8200 CGF.Int64Ty, /*isSigned=*/true));
8201 } else {
8202 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8203 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8204 VDLVal.getPointer(CGF));
8205 BasePointers.push_back(VarLVal.getPointer(CGF));
8206 Pointers.push_back(VarRVal.getScalarVal());
8207 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8208 }
8209 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8210 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8211 }
8212 }
8213
8214 /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8215 void adjustMemberOfForLambdaCaptures(
8216 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8217 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8218 MapFlagsArrayTy &Types) const {
8219 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8220 // Set correct member_of idx for all implicit lambda captures.
8221 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8222 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8223 continue;
8224 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8225 assert(BasePtr && "Unable to find base lambda address.");
8226 int TgtIdx = -1;
8227 for (unsigned J = I; J > 0; --J) {
8228 unsigned Idx = J - 1;
8229 if (Pointers[Idx] != BasePtr)
8230 continue;
8231 TgtIdx = Idx;
8232 break;
8233 }
8234 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8235 // All other current entries will be MEMBER_OF the combined entry
8236 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8237 // 0xFFFF in the MEMBER_OF field).
8238 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8239 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8240 }
8241 }
8242
8243 /// Generate the base pointers, section pointers, sizes and map types
8244 /// associated to a given capture.
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct) const8245 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8246 llvm::Value *Arg,
8247 MapBaseValuesArrayTy &BasePointers,
8248 MapValuesArrayTy &Pointers,
8249 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8250 StructRangeInfoTy &PartialStruct) const {
8251 assert(!Cap->capturesVariableArrayType() &&
8252 "Not expecting to generate map info for a variable array type!");
8253
8254 // We need to know when we generating information for the first component
8255 const ValueDecl *VD = Cap->capturesThis()
8256 ? nullptr
8257 : Cap->getCapturedVar()->getCanonicalDecl();
8258
8259 // If this declaration appears in a is_device_ptr clause we just have to
8260 // pass the pointer by value. If it is a reference to a declaration, we just
8261 // pass its value.
8262 if (DevPointersMap.count(VD)) {
8263 BasePointers.emplace_back(Arg, VD);
8264 Pointers.push_back(Arg);
8265 Sizes.push_back(
8266 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8267 CGF.Int64Ty, /*isSigned=*/true));
8268 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8269 return;
8270 }
8271
8272 using MapData =
8273 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8274 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8275 SmallVector<MapData, 4> DeclComponentLists;
8276 assert(CurDir.is<const OMPExecutableDirective *>() &&
8277 "Expect a executable directive");
8278 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8279 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8280 for (const auto L : C->decl_component_lists(VD)) {
8281 assert(L.first == VD &&
8282 "We got information for the wrong declaration??");
8283 assert(!L.second.empty() &&
8284 "Not expecting declaration with no component lists.");
8285 DeclComponentLists.emplace_back(L.second, C->getMapType(),
8286 C->getMapTypeModifiers(),
8287 C->isImplicit());
8288 }
8289 }
8290
8291 // Find overlapping elements (including the offset from the base element).
8292 llvm::SmallDenseMap<
8293 const MapData *,
8294 llvm::SmallVector<
8295 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8296 4>
8297 OverlappedData;
8298 size_t Count = 0;
8299 for (const MapData &L : DeclComponentLists) {
8300 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8301 OpenMPMapClauseKind MapType;
8302 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8303 bool IsImplicit;
8304 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8305 ++Count;
8306 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8308 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8309 auto CI = Components.rbegin();
8310 auto CE = Components.rend();
8311 auto SI = Components1.rbegin();
8312 auto SE = Components1.rend();
8313 for (; CI != CE && SI != SE; ++CI, ++SI) {
8314 if (CI->getAssociatedExpression()->getStmtClass() !=
8315 SI->getAssociatedExpression()->getStmtClass())
8316 break;
8317 // Are we dealing with different variables/fields?
8318 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8319 break;
8320 }
8321 // Found overlapping if, at least for one component, reached the head of
8322 // the components list.
8323 if (CI == CE || SI == SE) {
8324 assert((CI != CE || SI != SE) &&
8325 "Unexpected full match of the mapping components.");
8326 const MapData &BaseData = CI == CE ? L : L1;
8327 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8328 SI == SE ? Components : Components1;
8329 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8330 OverlappedElements.getSecond().push_back(SubData);
8331 }
8332 }
8333 }
8334 // Sort the overlapped elements for each item.
8335 llvm::SmallVector<const FieldDecl *, 4> Layout;
8336 if (!OverlappedData.empty()) {
8337 if (const auto *CRD =
8338 VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8339 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8340 else {
8341 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8342 Layout.append(RD->field_begin(), RD->field_end());
8343 }
8344 }
8345 for (auto &Pair : OverlappedData) {
8346 llvm::sort(
8347 Pair.getSecond(),
8348 [&Layout](
8349 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8350 OMPClauseMappableExprCommon::MappableExprComponentListRef
8351 Second) {
8352 auto CI = First.rbegin();
8353 auto CE = First.rend();
8354 auto SI = Second.rbegin();
8355 auto SE = Second.rend();
8356 for (; CI != CE && SI != SE; ++CI, ++SI) {
8357 if (CI->getAssociatedExpression()->getStmtClass() !=
8358 SI->getAssociatedExpression()->getStmtClass())
8359 break;
8360 // Are we dealing with different variables/fields?
8361 if (CI->getAssociatedDeclaration() !=
8362 SI->getAssociatedDeclaration())
8363 break;
8364 }
8365
8366 // Lists contain the same elements.
8367 if (CI == CE && SI == SE)
8368 return false;
8369
8370 // List with less elements is less than list with more elements.
8371 if (CI == CE || SI == SE)
8372 return CI == CE;
8373
8374 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8375 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8376 if (FD1->getParent() == FD2->getParent())
8377 return FD1->getFieldIndex() < FD2->getFieldIndex();
8378 const auto It =
8379 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8380 return FD == FD1 || FD == FD2;
8381 });
8382 return *It == FD1;
8383 });
8384 }
8385
8386 // Associated with a capture, because the mapping flags depend on it.
8387 // Go through all of the elements with the overlapped elements.
8388 for (const auto &Pair : OverlappedData) {
8389 const MapData &L = *Pair.getFirst();
8390 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8391 OpenMPMapClauseKind MapType;
8392 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8393 bool IsImplicit;
8394 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8395 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8396 OverlappedComponents = Pair.getSecond();
8397 bool IsFirstComponentList = true;
8398 generateInfoForComponentList(MapType, MapModifiers, Components,
8399 BasePointers, Pointers, Sizes, Types,
8400 PartialStruct, IsFirstComponentList,
8401 IsImplicit, OverlappedComponents);
8402 }
8403 // Go through other elements without overlapped elements.
8404 bool IsFirstComponentList = OverlappedData.empty();
8405 for (const MapData &L : DeclComponentLists) {
8406 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8407 OpenMPMapClauseKind MapType;
8408 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8409 bool IsImplicit;
8410 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8411 auto It = OverlappedData.find(&L);
8412 if (It == OverlappedData.end())
8413 generateInfoForComponentList(MapType, MapModifiers, Components,
8414 BasePointers, Pointers, Sizes, Types,
8415 PartialStruct, IsFirstComponentList,
8416 IsImplicit);
8417 IsFirstComponentList = false;
8418 }
8419 }
8420
8421 /// Generate the base pointers, section pointers, sizes and map types
8422 /// associated with the declare target link variables.
generateInfoForDeclareTargetLink(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8423 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8424 MapValuesArrayTy &Pointers,
8425 MapValuesArrayTy &Sizes,
8426 MapFlagsArrayTy &Types) const {
8427 assert(CurDir.is<const OMPExecutableDirective *>() &&
8428 "Expect a executable directive");
8429 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8430 // Map other list items in the map clause which are not captured variables
8431 // but "declare target link" global variables.
8432 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8433 for (const auto L : C->component_lists()) {
8434 if (!L.first)
8435 continue;
8436 const auto *VD = dyn_cast<VarDecl>(L.first);
8437 if (!VD)
8438 continue;
8439 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8440 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8441 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8442 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8443 continue;
8444 StructRangeInfoTy PartialStruct;
8445 generateInfoForComponentList(
8446 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8447 Pointers, Sizes, Types, PartialStruct,
8448 /*IsFirstComponentList=*/true, C->isImplicit());
8449 assert(!PartialStruct.Base.isValid() &&
8450 "No partial structs for declare target link expected.");
8451 }
8452 }
8453 }
8454
8455 /// Generate the default map information for a given capture \a CI,
8456 /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapBaseValuesArrayTy & CurBasePointers,MapValuesArrayTy & CurPointers,MapValuesArrayTy & CurSizes,MapFlagsArrayTy & CurMapTypes) const8457 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8458 const FieldDecl &RI, llvm::Value *CV,
8459 MapBaseValuesArrayTy &CurBasePointers,
8460 MapValuesArrayTy &CurPointers,
8461 MapValuesArrayTy &CurSizes,
8462 MapFlagsArrayTy &CurMapTypes) const {
8463 bool IsImplicit = true;
8464 // Do the default mapping.
8465 if (CI.capturesThis()) {
8466 CurBasePointers.push_back(CV);
8467 CurPointers.push_back(CV);
8468 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8469 CurSizes.push_back(
8470 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8471 CGF.Int64Ty, /*isSigned=*/true));
8472 // Default map type.
8473 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8474 } else if (CI.capturesVariableByCopy()) {
8475 CurBasePointers.push_back(CV);
8476 CurPointers.push_back(CV);
8477 if (!RI.getType()->isAnyPointerType()) {
8478 // We have to signal to the runtime captures passed by value that are
8479 // not pointers.
8480 CurMapTypes.push_back(OMP_MAP_LITERAL);
8481 CurSizes.push_back(CGF.Builder.CreateIntCast(
8482 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8483 } else {
8484 // Pointers are implicitly mapped with a zero size and no flags
8485 // (other than first map that is added for all implicit maps).
8486 CurMapTypes.push_back(OMP_MAP_NONE);
8487 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8488 }
8489 const VarDecl *VD = CI.getCapturedVar();
8490 auto I = FirstPrivateDecls.find(VD);
8491 if (I != FirstPrivateDecls.end())
8492 IsImplicit = I->getSecond();
8493 } else {
8494 assert(CI.capturesVariable() && "Expected captured reference.");
8495 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8496 QualType ElementType = PtrTy->getPointeeType();
8497 CurSizes.push_back(CGF.Builder.CreateIntCast(
8498 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8499 // The default map type for a scalar/complex type is 'to' because by
8500 // default the value doesn't have to be retrieved. For an aggregate
8501 // type, the default is 'tofrom'.
8502 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8503 const VarDecl *VD = CI.getCapturedVar();
8504 auto I = FirstPrivateDecls.find(VD);
8505 if (I != FirstPrivateDecls.end() &&
8506 VD->getType().isConstant(CGF.getContext())) {
8507 llvm::Constant *Addr =
8508 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8509 // Copy the value of the original variable to the new global copy.
8510 CGF.Builder.CreateMemCpy(
8511 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8512 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8513 CurSizes.back(), /*IsVolatile=*/false);
8514 // Use new global variable as the base pointers.
8515 CurBasePointers.push_back(Addr);
8516 CurPointers.push_back(Addr);
8517 } else {
8518 CurBasePointers.push_back(CV);
8519 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8520 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8521 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8522 AlignmentSource::Decl));
8523 CurPointers.push_back(PtrAddr.getPointer());
8524 } else {
8525 CurPointers.push_back(CV);
8526 }
8527 }
8528 if (I != FirstPrivateDecls.end())
8529 IsImplicit = I->getSecond();
8530 }
8531 // Every default map produces a single argument which is a target parameter.
8532 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8533
8534 // Add flag stating this is an implicit map.
8535 if (IsImplicit)
8536 CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8537 }
8538 };
8539 } // anonymous namespace
8540
8541 /// Emit the arrays used to pass the captures and map information to the
8542 /// offloading runtime library. If there is no map or capture information,
8543 /// return nullptr by reference.
8544 static void
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapBaseValuesArrayTy & BasePointers,MappableExprsHandler::MapValuesArrayTy & Pointers,MappableExprsHandler::MapValuesArrayTy & Sizes,MappableExprsHandler::MapFlagsArrayTy & MapTypes,CGOpenMPRuntime::TargetDataInfo & Info)8545 emitOffloadingArrays(CodeGenFunction &CGF,
8546 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8547 MappableExprsHandler::MapValuesArrayTy &Pointers,
8548 MappableExprsHandler::MapValuesArrayTy &Sizes,
8549 MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8550 CGOpenMPRuntime::TargetDataInfo &Info) {
8551 CodeGenModule &CGM = CGF.CGM;
8552 ASTContext &Ctx = CGF.getContext();
8553
8554 // Reset the array information.
8555 Info.clearArrayInfo();
8556 Info.NumberOfPtrs = BasePointers.size();
8557
8558 if (Info.NumberOfPtrs) {
8559 // Detect if we have any capture size requiring runtime evaluation of the
8560 // size so that a constant array could be eventually used.
8561 bool hasRuntimeEvaluationCaptureSize = false;
8562 for (llvm::Value *S : Sizes)
8563 if (!isa<llvm::Constant>(S)) {
8564 hasRuntimeEvaluationCaptureSize = true;
8565 break;
8566 }
8567
8568 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8569 QualType PointerArrayType = Ctx.getConstantArrayType(
8570 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8571 /*IndexTypeQuals=*/0);
8572
8573 Info.BasePointersArray =
8574 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8575 Info.PointersArray =
8576 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8577
8578 // If we don't have any VLA types or other types that require runtime
8579 // evaluation, we can use a constant array for the map sizes, otherwise we
8580 // need to fill up the arrays as we do for the pointers.
8581 QualType Int64Ty =
8582 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8583 if (hasRuntimeEvaluationCaptureSize) {
8584 QualType SizeArrayType = Ctx.getConstantArrayType(
8585 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8586 /*IndexTypeQuals=*/0);
8587 Info.SizesArray =
8588 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8589 } else {
8590 // We expect all the sizes to be constant, so we collect them to create
8591 // a constant array.
8592 SmallVector<llvm::Constant *, 16> ConstSizes;
8593 for (llvm::Value *S : Sizes)
8594 ConstSizes.push_back(cast<llvm::Constant>(S));
8595
8596 auto *SizesArrayInit = llvm::ConstantArray::get(
8597 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8598 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8599 auto *SizesArrayGbl = new llvm::GlobalVariable(
8600 CGM.getModule(), SizesArrayInit->getType(),
8601 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8602 SizesArrayInit, Name);
8603 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8604 Info.SizesArray = SizesArrayGbl;
8605 }
8606
8607 // The map types are always constant so we don't need to generate code to
8608 // fill arrays. Instead, we create an array constant.
8609 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8610 llvm::copy(MapTypes, Mapping.begin());
8611 llvm::Constant *MapTypesArrayInit =
8612 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8613 std::string MaptypesName =
8614 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8615 auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8616 CGM.getModule(), MapTypesArrayInit->getType(),
8617 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8618 MapTypesArrayInit, MaptypesName);
8619 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8620 Info.MapTypesArray = MapTypesArrayGbl;
8621
8622 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8623 llvm::Value *BPVal = *BasePointers[I];
8624 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8625 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8626 Info.BasePointersArray, 0, I);
8627 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8628 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8629 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8630 CGF.Builder.CreateStore(BPVal, BPAddr);
8631
8632 if (Info.requiresDevicePointerInfo())
8633 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8634 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8635
8636 llvm::Value *PVal = Pointers[I];
8637 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8638 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8639 Info.PointersArray, 0, I);
8640 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8641 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8642 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8643 CGF.Builder.CreateStore(PVal, PAddr);
8644
8645 if (hasRuntimeEvaluationCaptureSize) {
8646 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8647 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8648 Info.SizesArray,
8649 /*Idx0=*/0,
8650 /*Idx1=*/I);
8651 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8652 CGF.Builder.CreateStore(
8653 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8654 SAddr);
8655 }
8656 }
8657 }
8658 }
8659
8660 /// Emit the arguments to be passed to the runtime library based on the
8661 /// arrays of pointers, sizes and map types.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,CGOpenMPRuntime::TargetDataInfo & Info)8662 static void emitOffloadingArraysArgument(
8663 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8664 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8665 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8666 CodeGenModule &CGM = CGF.CGM;
8667 if (Info.NumberOfPtrs) {
8668 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8669 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8670 Info.BasePointersArray,
8671 /*Idx0=*/0, /*Idx1=*/0);
8672 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8673 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8674 Info.PointersArray,
8675 /*Idx0=*/0,
8676 /*Idx1=*/0);
8677 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8679 /*Idx0=*/0, /*Idx1=*/0);
8680 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8681 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8682 Info.MapTypesArray,
8683 /*Idx0=*/0,
8684 /*Idx1=*/0);
8685 } else {
8686 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8687 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8688 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8689 MapTypesArrayArg =
8690 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8691 }
8692 }
8693
8694 /// Check for inner distribute directive.
8695 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)8696 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8697 const auto *CS = D.getInnermostCapturedStmt();
8698 const auto *Body =
8699 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8700 const Stmt *ChildStmt =
8701 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8702
8703 if (const auto *NestedDir =
8704 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8705 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8706 switch (D.getDirectiveKind()) {
8707 case OMPD_target:
8708 if (isOpenMPDistributeDirective(DKind))
8709 return NestedDir;
8710 if (DKind == OMPD_teams) {
8711 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8712 /*IgnoreCaptured=*/true);
8713 if (!Body)
8714 return nullptr;
8715 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8716 if (const auto *NND =
8717 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8718 DKind = NND->getDirectiveKind();
8719 if (isOpenMPDistributeDirective(DKind))
8720 return NND;
8721 }
8722 }
8723 return nullptr;
8724 case OMPD_target_teams:
8725 if (isOpenMPDistributeDirective(DKind))
8726 return NestedDir;
8727 return nullptr;
8728 case OMPD_target_parallel:
8729 case OMPD_target_simd:
8730 case OMPD_target_parallel_for:
8731 case OMPD_target_parallel_for_simd:
8732 return nullptr;
8733 case OMPD_target_teams_distribute:
8734 case OMPD_target_teams_distribute_simd:
8735 case OMPD_target_teams_distribute_parallel_for:
8736 case OMPD_target_teams_distribute_parallel_for_simd:
8737 case OMPD_parallel:
8738 case OMPD_for:
8739 case OMPD_parallel_for:
8740 case OMPD_parallel_master:
8741 case OMPD_parallel_sections:
8742 case OMPD_for_simd:
8743 case OMPD_parallel_for_simd:
8744 case OMPD_cancel:
8745 case OMPD_cancellation_point:
8746 case OMPD_ordered:
8747 case OMPD_threadprivate:
8748 case OMPD_allocate:
8749 case OMPD_task:
8750 case OMPD_simd:
8751 case OMPD_sections:
8752 case OMPD_section:
8753 case OMPD_single:
8754 case OMPD_master:
8755 case OMPD_critical:
8756 case OMPD_taskyield:
8757 case OMPD_barrier:
8758 case OMPD_taskwait:
8759 case OMPD_taskgroup:
8760 case OMPD_atomic:
8761 case OMPD_flush:
8762 case OMPD_teams:
8763 case OMPD_target_data:
8764 case OMPD_target_exit_data:
8765 case OMPD_target_enter_data:
8766 case OMPD_distribute:
8767 case OMPD_distribute_simd:
8768 case OMPD_distribute_parallel_for:
8769 case OMPD_distribute_parallel_for_simd:
8770 case OMPD_teams_distribute:
8771 case OMPD_teams_distribute_simd:
8772 case OMPD_teams_distribute_parallel_for:
8773 case OMPD_teams_distribute_parallel_for_simd:
8774 case OMPD_target_update:
8775 case OMPD_declare_simd:
8776 case OMPD_declare_variant:
8777 case OMPD_declare_target:
8778 case OMPD_end_declare_target:
8779 case OMPD_declare_reduction:
8780 case OMPD_declare_mapper:
8781 case OMPD_taskloop:
8782 case OMPD_taskloop_simd:
8783 case OMPD_master_taskloop:
8784 case OMPD_master_taskloop_simd:
8785 case OMPD_parallel_master_taskloop:
8786 case OMPD_parallel_master_taskloop_simd:
8787 case OMPD_requires:
8788 case OMPD_unknown:
8789 llvm_unreachable("Unexpected directive.");
8790 }
8791 }
8792
8793 return nullptr;
8794 }
8795
8796 /// Emit the user-defined mapper function. The code generation follows the
8797 /// pattern in the example below.
8798 /// \code
8799 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8800 /// void *base, void *begin,
8801 /// int64_t size, int64_t type) {
8802 /// // Allocate space for an array section first.
8803 /// if (size > 1 && !maptype.IsDelete)
8804 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8805 /// size*sizeof(Ty), clearToFrom(type));
8806 /// // Map members.
8807 /// for (unsigned i = 0; i < size; i++) {
8808 /// // For each component specified by this mapper:
8809 /// for (auto c : all_components) {
8810 /// if (c.hasMapper())
8811 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8812 /// c.arg_type);
8813 /// else
8814 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8815 /// c.arg_begin, c.arg_size, c.arg_type);
8816 /// }
8817 /// }
8818 /// // Delete the array section.
8819 /// if (size > 1 && maptype.IsDelete)
8820 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8821 /// size*sizeof(Ty), clearToFrom(type));
8822 /// }
8823 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)8824 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8825 CodeGenFunction *CGF) {
8826 if (UDMMap.count(D) > 0)
8827 return;
8828 ASTContext &C = CGM.getContext();
8829 QualType Ty = D->getType();
8830 QualType PtrTy = C.getPointerType(Ty).withRestrict();
8831 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8832 auto *MapperVarDecl =
8833 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8834 SourceLocation Loc = D->getLocation();
8835 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8836
8837 // Prepare mapper function arguments and attributes.
8838 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8839 C.VoidPtrTy, ImplicitParamDecl::Other);
8840 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8841 ImplicitParamDecl::Other);
8842 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8843 C.VoidPtrTy, ImplicitParamDecl::Other);
8844 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8845 ImplicitParamDecl::Other);
8846 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8847 ImplicitParamDecl::Other);
8848 FunctionArgList Args;
8849 Args.push_back(&HandleArg);
8850 Args.push_back(&BaseArg);
8851 Args.push_back(&BeginArg);
8852 Args.push_back(&SizeArg);
8853 Args.push_back(&TypeArg);
8854 const CGFunctionInfo &FnInfo =
8855 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8856 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8857 SmallString<64> TyStr;
8858 llvm::raw_svector_ostream Out(TyStr);
8859 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8860 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8861 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8862 Name, &CGM.getModule());
8863 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8864 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8865 // Start the mapper function code generation.
8866 CodeGenFunction MapperCGF(CGM);
8867 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8868 // Compute the starting and end addreses of array elements.
8869 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8870 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8871 C.getPointerType(Int64Ty), Loc);
8872 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8873 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8874 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8875 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8876 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8877 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8878 C.getPointerType(Int64Ty), Loc);
8879 // Prepare common arguments for array initiation and deletion.
8880 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8881 MapperCGF.GetAddrOfLocalVar(&HandleArg),
8882 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8883 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8884 MapperCGF.GetAddrOfLocalVar(&BaseArg),
8885 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8886 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8887 MapperCGF.GetAddrOfLocalVar(&BeginArg),
8888 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8889
8890 // Emit array initiation if this is an array section and \p MapType indicates
8891 // that memory allocation is required.
8892 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8893 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8894 ElementSize, HeadBB, /*IsInit=*/true);
8895
8896 // Emit a for loop to iterate through SizeArg of elements and map all of them.
8897
8898 // Emit the loop header block.
8899 MapperCGF.EmitBlock(HeadBB);
8900 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8901 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8902 // Evaluate whether the initial condition is satisfied.
8903 llvm::Value *IsEmpty =
8904 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8905 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8906 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8907
8908 // Emit the loop body block.
8909 MapperCGF.EmitBlock(BodyBB);
8910 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8911 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8912 PtrPHI->addIncoming(PtrBegin, EntryBB);
8913 Address PtrCurrent =
8914 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8915 .getAlignment()
8916 .alignmentOfArrayElement(ElementSize));
8917 // Privatize the declared variable of mapper to be the current array element.
8918 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8919 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8920 return MapperCGF
8921 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8922 .getAddress(MapperCGF);
8923 });
8924 (void)Scope.Privatize();
8925
8926 // Get map clause information. Fill up the arrays with all mapped variables.
8927 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8928 MappableExprsHandler::MapValuesArrayTy Pointers;
8929 MappableExprsHandler::MapValuesArrayTy Sizes;
8930 MappableExprsHandler::MapFlagsArrayTy MapTypes;
8931 MappableExprsHandler MEHandler(*D, MapperCGF);
8932 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8933
8934 // Call the runtime API __tgt_mapper_num_components to get the number of
8935 // pre-existing components.
8936 llvm::Value *OffloadingArgs[] = {Handle};
8937 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8938 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8939 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8940 PreviousSize,
8941 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8942
8943 // Fill up the runtime mapper handle for all components.
8944 for (unsigned I = 0; I < BasePointers.size(); ++I) {
8945 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8946 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8947 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8948 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8949 llvm::Value *CurSizeArg = Sizes[I];
8950
8951 // Extract the MEMBER_OF field from the map type.
8952 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8953 MapperCGF.EmitBlock(MemberBB);
8954 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8955 llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8956 OriMapType,
8957 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8958 llvm::BasicBlock *MemberCombineBB =
8959 MapperCGF.createBasicBlock("omp.member.combine");
8960 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8961 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8962 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8963 // Add the number of pre-existing components to the MEMBER_OF field if it
8964 // is valid.
8965 MapperCGF.EmitBlock(MemberCombineBB);
8966 llvm::Value *CombinedMember =
8967 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8968 // Do nothing if it is not a member of previous components.
8969 MapperCGF.EmitBlock(TypeBB);
8970 llvm::PHINode *MemberMapType =
8971 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8972 MemberMapType->addIncoming(OriMapType, MemberBB);
8973 MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8974
8975 // Combine the map type inherited from user-defined mapper with that
8976 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8977 // bits of the \a MapType, which is the input argument of the mapper
8978 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8979 // bits of MemberMapType.
8980 // [OpenMP 5.0], 1.2.6. map-type decay.
8981 // | alloc | to | from | tofrom | release | delete
8982 // ----------------------------------------------------------
8983 // alloc | alloc | alloc | alloc | alloc | release | delete
8984 // to | alloc | to | alloc | to | release | delete
8985 // from | alloc | alloc | from | from | release | delete
8986 // tofrom | alloc | to | from | tofrom | release | delete
8987 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8988 MapType,
8989 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8990 MappableExprsHandler::OMP_MAP_FROM));
8991 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8992 llvm::BasicBlock *AllocElseBB =
8993 MapperCGF.createBasicBlock("omp.type.alloc.else");
8994 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
8995 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
8996 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
8997 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
8998 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
8999 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9000 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9001 MapperCGF.EmitBlock(AllocBB);
9002 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9003 MemberMapType,
9004 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9005 MappableExprsHandler::OMP_MAP_FROM)));
9006 MapperCGF.Builder.CreateBr(EndBB);
9007 MapperCGF.EmitBlock(AllocElseBB);
9008 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9009 LeftToFrom,
9010 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9011 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9012 // In case of to, clear OMP_MAP_FROM.
9013 MapperCGF.EmitBlock(ToBB);
9014 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9015 MemberMapType,
9016 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9017 MapperCGF.Builder.CreateBr(EndBB);
9018 MapperCGF.EmitBlock(ToElseBB);
9019 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9020 LeftToFrom,
9021 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9022 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9023 // In case of from, clear OMP_MAP_TO.
9024 MapperCGF.EmitBlock(FromBB);
9025 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9026 MemberMapType,
9027 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9028 // In case of tofrom, do nothing.
9029 MapperCGF.EmitBlock(EndBB);
9030 llvm::PHINode *CurMapType =
9031 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9032 CurMapType->addIncoming(AllocMapType, AllocBB);
9033 CurMapType->addIncoming(ToMapType, ToBB);
9034 CurMapType->addIncoming(FromMapType, FromBB);
9035 CurMapType->addIncoming(MemberMapType, ToElseBB);
9036
9037 // TODO: call the corresponding mapper function if a user-defined mapper is
9038 // associated with this map clause.
9039 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9040 // data structure.
9041 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9042 CurSizeArg, CurMapType};
9043 MapperCGF.EmitRuntimeCall(
9044 createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9045 OffloadingArgs);
9046 }
9047
9048 // Update the pointer to point to the next element that needs to be mapped,
9049 // and check whether we have mapped all elements.
9050 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9051 PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9052 PtrPHI->addIncoming(PtrNext, BodyBB);
9053 llvm::Value *IsDone =
9054 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9055 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9056 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9057
9058 MapperCGF.EmitBlock(ExitBB);
9059 // Emit array deletion if this is an array section and \p MapType indicates
9060 // that deletion is required.
9061 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9062 ElementSize, DoneBB, /*IsInit=*/false);
9063
9064 // Emit the function exit block.
9065 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9066 MapperCGF.FinishFunction();
9067 UDMMap.try_emplace(D, Fn);
9068 if (CGF) {
9069 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9070 Decls.second.push_back(D);
9071 }
9072 }
9073
9074 /// Emit the array initialization or deletion portion for user-defined mapper
9075 /// code generation. First, it evaluates whether an array section is mapped and
9076 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9077 /// true, and \a MapType indicates to not delete this array, array
9078 /// initialization code is generated. If \a IsInit is false, and \a MapType
9079 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)9080 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9081 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9082 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9083 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9084 StringRef Prefix = IsInit ? ".init" : ".del";
9085
9086 // Evaluate if this is an array section.
9087 llvm::BasicBlock *IsDeleteBB =
9088 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9089 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9090 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9091 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9092 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9093
9094 // Evaluate if we are going to delete this section.
9095 MapperCGF.EmitBlock(IsDeleteBB);
9096 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9097 MapType,
9098 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9099 llvm::Value *DeleteCond;
9100 if (IsInit) {
9101 DeleteCond = MapperCGF.Builder.CreateIsNull(
9102 DeleteBit, "omp.array" + Prefix + ".delete");
9103 } else {
9104 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9105 DeleteBit, "omp.array" + Prefix + ".delete");
9106 }
9107 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9108
9109 MapperCGF.EmitBlock(BodyBB);
9110 // Get the array size by multiplying element size and element number (i.e., \p
9111 // Size).
9112 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9113 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9114 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9115 // memory allocation/deletion purpose only.
9116 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9117 MapType,
9118 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9119 MappableExprsHandler::OMP_MAP_FROM)));
9120 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9121 // data structure.
9122 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9123 MapperCGF.EmitRuntimeCall(
9124 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9125 }
9126
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * DeviceID,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9127 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9128 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9129 llvm::Value *DeviceID,
9130 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9131 const OMPLoopDirective &D)>
9132 SizeEmitter) {
9133 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9134 const OMPExecutableDirective *TD = &D;
9135 // Get nested teams distribute kind directive, if any.
9136 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9137 TD = getNestedDistributeDirective(CGM.getContext(), D);
9138 if (!TD)
9139 return;
9140 const auto *LD = cast<OMPLoopDirective>(TD);
9141 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9142 PrePostActionTy &) {
9143 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9144 llvm::Value *Args[] = {DeviceID, NumIterations};
9145 CGF.EmitRuntimeCall(
9146 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9147 }
9148 };
9149 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9150 }
9151
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,const Expr * Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9152 void CGOpenMPRuntime::emitTargetCall(
9153 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9154 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9155 const Expr *Device,
9156 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9157 const OMPLoopDirective &D)>
9158 SizeEmitter) {
9159 if (!CGF.HaveInsertPoint())
9160 return;
9161
9162 assert(OutlinedFn && "Invalid outlined function!");
9163
9164 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9165 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9166 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9167 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9168 PrePostActionTy &) {
9169 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9170 };
9171 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9172
9173 CodeGenFunction::OMPTargetDataInfo InputInfo;
9174 llvm::Value *MapTypesArray = nullptr;
9175 // Fill up the pointer arrays and transfer execution to the device.
9176 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9177 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9178 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9179 // On top of the arrays that were filled up, the target offloading call
9180 // takes as arguments the device id as well as the host pointer. The host
9181 // pointer is used by the runtime library to identify the current target
9182 // region, so it only has to be unique and not necessarily point to
9183 // anything. It could be the pointer to the outlined function that
9184 // implements the target region, but we aren't using that so that the
9185 // compiler doesn't need to keep that, and could therefore inline the host
9186 // function if proven worthwhile during optimization.
9187
9188 // From this point on, we need to have an ID of the target region defined.
9189 assert(OutlinedFnID && "Invalid outlined function ID!");
9190
9191 // Emit device ID if any.
9192 llvm::Value *DeviceID;
9193 if (Device) {
9194 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9195 CGF.Int64Ty, /*isSigned=*/true);
9196 } else {
9197 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9198 }
9199
9200 // Emit the number of elements in the offloading arrays.
9201 llvm::Value *PointerNum =
9202 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9203
9204 // Return value of the runtime offloading call.
9205 llvm::Value *Return;
9206
9207 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9208 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9209
9210 // Emit tripcount for the target loop-based directive.
9211 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9212
9213 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9214 // The target region is an outlined function launched by the runtime
9215 // via calls __tgt_target() or __tgt_target_teams().
9216 //
9217 // __tgt_target() launches a target region with one team and one thread,
9218 // executing a serial region. This master thread may in turn launch
9219 // more threads within its team upon encountering a parallel region,
9220 // however, no additional teams can be launched on the device.
9221 //
9222 // __tgt_target_teams() launches a target region with one or more teams,
9223 // each with one or more threads. This call is required for target
9224 // constructs such as:
9225 // 'target teams'
9226 // 'target' / 'teams'
9227 // 'target teams distribute parallel for'
9228 // 'target parallel'
9229 // and so on.
9230 //
9231 // Note that on the host and CPU targets, the runtime implementation of
9232 // these calls simply call the outlined function without forking threads.
9233 // The outlined functions themselves have runtime calls to
9234 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9235 // the compiler in emitTeamsCall() and emitParallelCall().
9236 //
9237 // In contrast, on the NVPTX target, the implementation of
9238 // __tgt_target_teams() launches a GPU kernel with the requested number
9239 // of teams and threads so no additional calls to the runtime are required.
9240 if (NumTeams) {
9241 // If we have NumTeams defined this means that we have an enclosed teams
9242 // region. Therefore we also expect to have NumThreads defined. These two
9243 // values should be defined in the presence of a teams directive,
9244 // regardless of having any clauses associated. If the user is using teams
9245 // but no clauses, these two values will be the default that should be
9246 // passed to the runtime library - a 32-bit integer with the value zero.
9247 assert(NumThreads && "Thread limit expression should be available along "
9248 "with number of teams.");
9249 llvm::Value *OffloadingArgs[] = {DeviceID,
9250 OutlinedFnID,
9251 PointerNum,
9252 InputInfo.BasePointersArray.getPointer(),
9253 InputInfo.PointersArray.getPointer(),
9254 InputInfo.SizesArray.getPointer(),
9255 MapTypesArray,
9256 NumTeams,
9257 NumThreads};
9258 Return = CGF.EmitRuntimeCall(
9259 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9260 : OMPRTL__tgt_target_teams),
9261 OffloadingArgs);
9262 } else {
9263 llvm::Value *OffloadingArgs[] = {DeviceID,
9264 OutlinedFnID,
9265 PointerNum,
9266 InputInfo.BasePointersArray.getPointer(),
9267 InputInfo.PointersArray.getPointer(),
9268 InputInfo.SizesArray.getPointer(),
9269 MapTypesArray};
9270 Return = CGF.EmitRuntimeCall(
9271 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9272 : OMPRTL__tgt_target),
9273 OffloadingArgs);
9274 }
9275
9276 // Check the error code and execute the host version if required.
9277 llvm::BasicBlock *OffloadFailedBlock =
9278 CGF.createBasicBlock("omp_offload.failed");
9279 llvm::BasicBlock *OffloadContBlock =
9280 CGF.createBasicBlock("omp_offload.cont");
9281 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9282 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9283
9284 CGF.EmitBlock(OffloadFailedBlock);
9285 if (RequiresOuterTask) {
9286 CapturedVars.clear();
9287 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9288 }
9289 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9290 CGF.EmitBranch(OffloadContBlock);
9291
9292 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9293 };
9294
9295 // Notify that the host version must be executed.
9296 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9297 RequiresOuterTask](CodeGenFunction &CGF,
9298 PrePostActionTy &) {
9299 if (RequiresOuterTask) {
9300 CapturedVars.clear();
9301 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9302 }
9303 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9304 };
9305
9306 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9307 &CapturedVars, RequiresOuterTask,
9308 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9309 // Fill up the arrays with all the captured variables.
9310 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9311 MappableExprsHandler::MapValuesArrayTy Pointers;
9312 MappableExprsHandler::MapValuesArrayTy Sizes;
9313 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9314
9315 // Get mappable expression information.
9316 MappableExprsHandler MEHandler(D, CGF);
9317 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9318
9319 auto RI = CS.getCapturedRecordDecl()->field_begin();
9320 auto CV = CapturedVars.begin();
9321 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9322 CE = CS.capture_end();
9323 CI != CE; ++CI, ++RI, ++CV) {
9324 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9325 MappableExprsHandler::MapValuesArrayTy CurPointers;
9326 MappableExprsHandler::MapValuesArrayTy CurSizes;
9327 MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9328 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9329
9330 // VLA sizes are passed to the outlined region by copy and do not have map
9331 // information associated.
9332 if (CI->capturesVariableArrayType()) {
9333 CurBasePointers.push_back(*CV);
9334 CurPointers.push_back(*CV);
9335 CurSizes.push_back(CGF.Builder.CreateIntCast(
9336 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9337 // Copy to the device as an argument. No need to retrieve it.
9338 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9339 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9340 MappableExprsHandler::OMP_MAP_IMPLICIT);
9341 } else {
9342 // If we have any information in the map clause, we use it, otherwise we
9343 // just do a default mapping.
9344 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9345 CurSizes, CurMapTypes, PartialStruct);
9346 if (CurBasePointers.empty())
9347 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9348 CurPointers, CurSizes, CurMapTypes);
9349 // Generate correct mapping for variables captured by reference in
9350 // lambdas.
9351 if (CI->capturesVariable())
9352 MEHandler.generateInfoForLambdaCaptures(
9353 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9354 CurMapTypes, LambdaPointers);
9355 }
9356 // We expect to have at least an element of information for this capture.
9357 assert(!CurBasePointers.empty() &&
9358 "Non-existing map pointer for capture!");
9359 assert(CurBasePointers.size() == CurPointers.size() &&
9360 CurBasePointers.size() == CurSizes.size() &&
9361 CurBasePointers.size() == CurMapTypes.size() &&
9362 "Inconsistent map information sizes!");
9363
9364 // If there is an entry in PartialStruct it means we have a struct with
9365 // individual members mapped. Emit an extra combined entry.
9366 if (PartialStruct.Base.isValid())
9367 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9368 CurMapTypes, PartialStruct);
9369
9370 // We need to append the results of this capture to what we already have.
9371 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9372 Pointers.append(CurPointers.begin(), CurPointers.end());
9373 Sizes.append(CurSizes.begin(), CurSizes.end());
9374 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9375 }
9376 // Adjust MEMBER_OF flags for the lambdas captures.
9377 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9378 Pointers, MapTypes);
9379 // Map other list items in the map clause which are not captured variables
9380 // but "declare target link" global variables.
9381 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9382 MapTypes);
9383
9384 TargetDataInfo Info;
9385 // Fill up the arrays and create the arguments.
9386 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9387 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9388 Info.PointersArray, Info.SizesArray,
9389 Info.MapTypesArray, Info);
9390 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9391 InputInfo.BasePointersArray =
9392 Address(Info.BasePointersArray, CGM.getPointerAlign());
9393 InputInfo.PointersArray =
9394 Address(Info.PointersArray, CGM.getPointerAlign());
9395 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9396 MapTypesArray = Info.MapTypesArray;
9397 if (RequiresOuterTask)
9398 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9399 else
9400 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9401 };
9402
9403 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9404 CodeGenFunction &CGF, PrePostActionTy &) {
9405 if (RequiresOuterTask) {
9406 CodeGenFunction::OMPTargetDataInfo InputInfo;
9407 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9408 } else {
9409 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9410 }
9411 };
9412
9413 // If we have a target function ID it means that we need to support
9414 // offloading, otherwise, just execute on the host. We need to execute on host
9415 // regardless of the conditional in the if clause if, e.g., the user do not
9416 // specify target triples.
9417 if (OutlinedFnID) {
9418 if (IfCond) {
9419 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9420 } else {
9421 RegionCodeGenTy ThenRCG(TargetThenGen);
9422 ThenRCG(CGF);
9423 }
9424 } else {
9425 RegionCodeGenTy ElseRCG(TargetElseGen);
9426 ElseRCG(CGF);
9427 }
9428 }
9429
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9430 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9431 StringRef ParentName) {
9432 if (!S)
9433 return;
9434
9435 // Codegen OMP target directives that offload compute to the device.
9436 bool RequiresDeviceCodegen =
9437 isa<OMPExecutableDirective>(S) &&
9438 isOpenMPTargetExecutionDirective(
9439 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9440
9441 if (RequiresDeviceCodegen) {
9442 const auto &E = *cast<OMPExecutableDirective>(S);
9443 unsigned DeviceID;
9444 unsigned FileID;
9445 unsigned Line;
9446 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9447 FileID, Line);
9448
9449 // Is this a target region that should not be emitted as an entry point? If
9450 // so just signal we are done with this target region.
9451 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9452 ParentName, Line))
9453 return;
9454
9455 switch (E.getDirectiveKind()) {
9456 case OMPD_target:
9457 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9458 cast<OMPTargetDirective>(E));
9459 break;
9460 case OMPD_target_parallel:
9461 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9462 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9463 break;
9464 case OMPD_target_teams:
9465 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9466 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9467 break;
9468 case OMPD_target_teams_distribute:
9469 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9470 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9471 break;
9472 case OMPD_target_teams_distribute_simd:
9473 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9474 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9475 break;
9476 case OMPD_target_parallel_for:
9477 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9478 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9479 break;
9480 case OMPD_target_parallel_for_simd:
9481 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9482 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9483 break;
9484 case OMPD_target_simd:
9485 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9486 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9487 break;
9488 case OMPD_target_teams_distribute_parallel_for:
9489 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9490 CGM, ParentName,
9491 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9492 break;
9493 case OMPD_target_teams_distribute_parallel_for_simd:
9494 CodeGenFunction::
9495 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9496 CGM, ParentName,
9497 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9498 break;
9499 case OMPD_parallel:
9500 case OMPD_for:
9501 case OMPD_parallel_for:
9502 case OMPD_parallel_master:
9503 case OMPD_parallel_sections:
9504 case OMPD_for_simd:
9505 case OMPD_parallel_for_simd:
9506 case OMPD_cancel:
9507 case OMPD_cancellation_point:
9508 case OMPD_ordered:
9509 case OMPD_threadprivate:
9510 case OMPD_allocate:
9511 case OMPD_task:
9512 case OMPD_simd:
9513 case OMPD_sections:
9514 case OMPD_section:
9515 case OMPD_single:
9516 case OMPD_master:
9517 case OMPD_critical:
9518 case OMPD_taskyield:
9519 case OMPD_barrier:
9520 case OMPD_taskwait:
9521 case OMPD_taskgroup:
9522 case OMPD_atomic:
9523 case OMPD_flush:
9524 case OMPD_teams:
9525 case OMPD_target_data:
9526 case OMPD_target_exit_data:
9527 case OMPD_target_enter_data:
9528 case OMPD_distribute:
9529 case OMPD_distribute_simd:
9530 case OMPD_distribute_parallel_for:
9531 case OMPD_distribute_parallel_for_simd:
9532 case OMPD_teams_distribute:
9533 case OMPD_teams_distribute_simd:
9534 case OMPD_teams_distribute_parallel_for:
9535 case OMPD_teams_distribute_parallel_for_simd:
9536 case OMPD_target_update:
9537 case OMPD_declare_simd:
9538 case OMPD_declare_variant:
9539 case OMPD_declare_target:
9540 case OMPD_end_declare_target:
9541 case OMPD_declare_reduction:
9542 case OMPD_declare_mapper:
9543 case OMPD_taskloop:
9544 case OMPD_taskloop_simd:
9545 case OMPD_master_taskloop:
9546 case OMPD_master_taskloop_simd:
9547 case OMPD_parallel_master_taskloop:
9548 case OMPD_parallel_master_taskloop_simd:
9549 case OMPD_requires:
9550 case OMPD_unknown:
9551 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9552 }
9553 return;
9554 }
9555
9556 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9557 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9558 return;
9559
9560 scanForTargetRegionsFunctions(
9561 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9562 return;
9563 }
9564
9565 // If this is a lambda function, look into its body.
9566 if (const auto *L = dyn_cast<LambdaExpr>(S))
9567 S = L->getBody();
9568
9569 // Keep looking for target regions recursively.
9570 for (const Stmt *II : S->children())
9571 scanForTargetRegionsFunctions(II, ParentName);
9572 }
9573
emitTargetFunctions(GlobalDecl GD)9574 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9575 // If emitting code for the host, we do not process FD here. Instead we do
9576 // the normal code generation.
9577 if (!CGM.getLangOpts().OpenMPIsDevice) {
9578 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9579 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9580 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9581 // Do not emit device_type(nohost) functions for the host.
9582 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9583 return true;
9584 }
9585 return false;
9586 }
9587
9588 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9589 // Try to detect target regions in the function.
9590 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9591 StringRef Name = CGM.getMangledName(GD);
9592 scanForTargetRegionsFunctions(FD->getBody(), Name);
9593 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9594 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9595 // Do not emit device_type(nohost) functions for the host.
9596 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9597 return true;
9598 }
9599
9600 // Do not to emit function if it is not marked as declare target.
9601 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9602 AlreadyEmittedTargetDecls.count(VD) == 0;
9603 }
9604
emitTargetGlobalVariable(GlobalDecl GD)9605 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9606 if (!CGM.getLangOpts().OpenMPIsDevice)
9607 return false;
9608
9609 // Check if there are Ctors/Dtors in this declaration and look for target
9610 // regions in it. We use the complete variant to produce the kernel name
9611 // mangling.
9612 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9613 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9614 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9615 StringRef ParentName =
9616 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9617 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9618 }
9619 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9620 StringRef ParentName =
9621 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9622 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9623 }
9624 }
9625
9626 // Do not to emit variable if it is not marked as declare target.
9627 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9628 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9629 cast<VarDecl>(GD.getDecl()));
9630 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9631 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9632 HasRequiresUnifiedSharedMemory)) {
9633 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9634 return true;
9635 }
9636 return false;
9637 }
9638
9639 llvm::Constant *
registerTargetFirstprivateCopy(CodeGenFunction & CGF,const VarDecl * VD)9640 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9641 const VarDecl *VD) {
9642 assert(VD->getType().isConstant(CGM.getContext()) &&
9643 "Expected constant variable.");
9644 StringRef VarName;
9645 llvm::Constant *Addr;
9646 llvm::GlobalValue::LinkageTypes Linkage;
9647 QualType Ty = VD->getType();
9648 SmallString<128> Buffer;
9649 {
9650 unsigned DeviceID;
9651 unsigned FileID;
9652 unsigned Line;
9653 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9654 FileID, Line);
9655 llvm::raw_svector_ostream OS(Buffer);
9656 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9657 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9658 VarName = OS.str();
9659 }
9660 Linkage = llvm::GlobalValue::InternalLinkage;
9661 Addr =
9662 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9663 getDefaultFirstprivateAddressSpace());
9664 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9665 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9666 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9667 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9668 VarName, Addr, VarSize,
9669 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9670 return Addr;
9671 }
9672
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)9673 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9674 llvm::Constant *Addr) {
9675 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9676 !CGM.getLangOpts().OpenMPIsDevice)
9677 return;
9678 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9679 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9680 if (!Res) {
9681 if (CGM.getLangOpts().OpenMPIsDevice) {
9682 // Register non-target variables being emitted in device code (debug info
9683 // may cause this).
9684 StringRef VarName = CGM.getMangledName(VD);
9685 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9686 }
9687 return;
9688 }
9689 // Register declare target variables.
9690 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9691 StringRef VarName;
9692 CharUnits VarSize;
9693 llvm::GlobalValue::LinkageTypes Linkage;
9694
9695 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9696 !HasRequiresUnifiedSharedMemory) {
9697 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9698 VarName = CGM.getMangledName(VD);
9699 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9700 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9701 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9702 } else {
9703 VarSize = CharUnits::Zero();
9704 }
9705 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9706 // Temp solution to prevent optimizations of the internal variables.
9707 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9708 std::string RefName = getName({VarName, "ref"});
9709 if (!CGM.GetGlobalValue(RefName)) {
9710 llvm::Constant *AddrRef =
9711 getOrCreateInternalVariable(Addr->getType(), RefName);
9712 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9713 GVAddrRef->setConstant(/*Val=*/true);
9714 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9715 GVAddrRef->setInitializer(Addr);
9716 CGM.addCompilerUsedGlobal(GVAddrRef);
9717 }
9718 }
9719 } else {
9720 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9721 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9722 HasRequiresUnifiedSharedMemory)) &&
9723 "Declare target attribute must link or to with unified memory.");
9724 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9725 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9726 else
9727 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9728
9729 if (CGM.getLangOpts().OpenMPIsDevice) {
9730 VarName = Addr->getName();
9731 Addr = nullptr;
9732 } else {
9733 VarName = getAddrOfDeclareTargetVar(VD).getName();
9734 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9735 }
9736 VarSize = CGM.getPointerSize();
9737 Linkage = llvm::GlobalValue::WeakAnyLinkage;
9738 }
9739
9740 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9741 VarName, Addr, VarSize, Flags, Linkage);
9742 }
9743
emitTargetGlobal(GlobalDecl GD)9744 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9745 if (isa<FunctionDecl>(GD.getDecl()) ||
9746 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9747 return emitTargetFunctions(GD);
9748
9749 return emitTargetGlobalVariable(GD);
9750 }
9751
emitDeferredTargetDecls() const9752 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9753 for (const VarDecl *VD : DeferredGlobalVariables) {
9754 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9755 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9756 if (!Res)
9757 continue;
9758 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9759 !HasRequiresUnifiedSharedMemory) {
9760 CGM.EmitGlobal(VD);
9761 } else {
9762 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9763 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9764 HasRequiresUnifiedSharedMemory)) &&
9765 "Expected link clause or to clause with unified memory.");
9766 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9767 }
9768 }
9769 }
9770
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const9771 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9772 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9773 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9774 " Expected target-based directive.");
9775 }
9776
checkArchForUnifiedAddressing(const OMPRequiresDecl * D)9777 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9778 const OMPRequiresDecl *D) {
9779 for (const OMPClause *Clause : D->clauselists()) {
9780 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9781 HasRequiresUnifiedSharedMemory = true;
9782 break;
9783 }
9784 }
9785 }
9786
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)9787 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9788 LangAS &AS) {
9789 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9790 return false;
9791 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9792 switch(A->getAllocatorType()) {
9793 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9794 // Not supported, fallback to the default mem space.
9795 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9796 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9797 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9798 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9799 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9800 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9801 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9802 AS = LangAS::Default;
9803 return true;
9804 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9805 llvm_unreachable("Expected predefined allocator for the variables with the "
9806 "static storage.");
9807 }
9808 return false;
9809 }
9810
hasRequiresUnifiedSharedMemory() const9811 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9812 return HasRequiresUnifiedSharedMemory;
9813 }
9814
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)9815 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9816 CodeGenModule &CGM)
9817 : CGM(CGM) {
9818 if (CGM.getLangOpts().OpenMPIsDevice) {
9819 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9820 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9821 }
9822 }
9823
~DisableAutoDeclareTargetRAII()9824 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9825 if (CGM.getLangOpts().OpenMPIsDevice)
9826 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9827 }
9828
markAsGlobalTarget(GlobalDecl GD)9829 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9830 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9831 return true;
9832
9833 const auto *D = cast<FunctionDecl>(GD.getDecl());
9834 // Do not to emit function if it is marked as declare target as it was already
9835 // emitted.
9836 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9837 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9838 if (auto *F = dyn_cast_or_null<llvm::Function>(
9839 CGM.GetGlobalValue(CGM.getMangledName(GD))))
9840 return !F->isDeclaration();
9841 return false;
9842 }
9843 return true;
9844 }
9845
9846 return !AlreadyEmittedTargetDecls.insert(D).second;
9847 }
9848
emitRequiresDirectiveRegFun()9849 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9850 // If we don't have entries or if we are emitting code for the device, we
9851 // don't need to do anything.
9852 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9853 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9854 (OffloadEntriesInfoManager.empty() &&
9855 !HasEmittedDeclareTargetRegion &&
9856 !HasEmittedTargetRegion))
9857 return nullptr;
9858
9859 // Create and register the function that handles the requires directives.
9860 ASTContext &C = CGM.getContext();
9861
9862 llvm::Function *RequiresRegFn;
9863 {
9864 CodeGenFunction CGF(CGM);
9865 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9866 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9867 std::string ReqName = getName({"omp_offloading", "requires_reg"});
9868 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9869 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9870 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9871 // TODO: check for other requires clauses.
9872 // The requires directive takes effect only when a target region is
9873 // present in the compilation unit. Otherwise it is ignored and not
9874 // passed to the runtime. This avoids the runtime from throwing an error
9875 // for mismatching requires clauses across compilation units that don't
9876 // contain at least 1 target region.
9877 assert((HasEmittedTargetRegion ||
9878 HasEmittedDeclareTargetRegion ||
9879 !OffloadEntriesInfoManager.empty()) &&
9880 "Target or declare target region expected.");
9881 if (HasRequiresUnifiedSharedMemory)
9882 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9883 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9884 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9885 CGF.FinishFunction();
9886 }
9887 return RequiresRegFn;
9888 }
9889
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)9890 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9891 const OMPExecutableDirective &D,
9892 SourceLocation Loc,
9893 llvm::Function *OutlinedFn,
9894 ArrayRef<llvm::Value *> CapturedVars) {
9895 if (!CGF.HaveInsertPoint())
9896 return;
9897
9898 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9899 CodeGenFunction::RunCleanupsScope Scope(CGF);
9900
9901 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9902 llvm::Value *Args[] = {
9903 RTLoc,
9904 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9905 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9906 llvm::SmallVector<llvm::Value *, 16> RealArgs;
9907 RealArgs.append(std::begin(Args), std::end(Args));
9908 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9909
9910 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9911 CGF.EmitRuntimeCall(RTLFn, RealArgs);
9912 }
9913
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)9914 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9915 const Expr *NumTeams,
9916 const Expr *ThreadLimit,
9917 SourceLocation Loc) {
9918 if (!CGF.HaveInsertPoint())
9919 return;
9920
9921 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9922
9923 llvm::Value *NumTeamsVal =
9924 NumTeams
9925 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9926 CGF.CGM.Int32Ty, /* isSigned = */ true)
9927 : CGF.Builder.getInt32(0);
9928
9929 llvm::Value *ThreadLimitVal =
9930 ThreadLimit
9931 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9932 CGF.CGM.Int32Ty, /* isSigned = */ true)
9933 : CGF.Builder.getInt32(0);
9934
9935 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9936 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9937 ThreadLimitVal};
9938 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9939 PushNumTeamsArgs);
9940 }
9941
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)9942 void CGOpenMPRuntime::emitTargetDataCalls(
9943 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9944 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9945 if (!CGF.HaveInsertPoint())
9946 return;
9947
9948 // Action used to replace the default codegen action and turn privatization
9949 // off.
9950 PrePostActionTy NoPrivAction;
9951
9952 // Generate the code for the opening of the data environment. Capture all the
9953 // arguments of the runtime call by reference because they are used in the
9954 // closing of the region.
9955 auto &&BeginThenGen = [this, &D, Device, &Info,
9956 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9957 // Fill up the arrays with all the mapped variables.
9958 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9959 MappableExprsHandler::MapValuesArrayTy Pointers;
9960 MappableExprsHandler::MapValuesArrayTy Sizes;
9961 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9962
9963 // Get map clause information.
9964 MappableExprsHandler MCHandler(D, CGF);
9965 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9966
9967 // Fill up the arrays and create the arguments.
9968 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9969
9970 llvm::Value *BasePointersArrayArg = nullptr;
9971 llvm::Value *PointersArrayArg = nullptr;
9972 llvm::Value *SizesArrayArg = nullptr;
9973 llvm::Value *MapTypesArrayArg = nullptr;
9974 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9975 SizesArrayArg, MapTypesArrayArg, Info);
9976
9977 // Emit device ID if any.
9978 llvm::Value *DeviceID = nullptr;
9979 if (Device) {
9980 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9981 CGF.Int64Ty, /*isSigned=*/true);
9982 } else {
9983 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9984 }
9985
9986 // Emit the number of elements in the offloading arrays.
9987 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9988
9989 llvm::Value *OffloadingArgs[] = {
9990 DeviceID, PointerNum, BasePointersArrayArg,
9991 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9992 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9993 OffloadingArgs);
9994
9995 // If device pointer privatization is required, emit the body of the region
9996 // here. It will have to be duplicated: with and without privatization.
9997 if (!Info.CaptureDeviceAddrMap.empty())
9998 CodeGen(CGF);
9999 };
10000
10001 // Generate code for the closing of the data region.
10002 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10003 PrePostActionTy &) {
10004 assert(Info.isValid() && "Invalid data environment closing arguments.");
10005
10006 llvm::Value *BasePointersArrayArg = nullptr;
10007 llvm::Value *PointersArrayArg = nullptr;
10008 llvm::Value *SizesArrayArg = nullptr;
10009 llvm::Value *MapTypesArrayArg = nullptr;
10010 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10011 SizesArrayArg, MapTypesArrayArg, Info);
10012
10013 // Emit device ID if any.
10014 llvm::Value *DeviceID = nullptr;
10015 if (Device) {
10016 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10017 CGF.Int64Ty, /*isSigned=*/true);
10018 } else {
10019 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10020 }
10021
10022 // Emit the number of elements in the offloading arrays.
10023 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10024
10025 llvm::Value *OffloadingArgs[] = {
10026 DeviceID, PointerNum, BasePointersArrayArg,
10027 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10028 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10029 OffloadingArgs);
10030 };
10031
10032 // If we need device pointer privatization, we need to emit the body of the
10033 // region with no privatization in the 'else' branch of the conditional.
10034 // Otherwise, we don't have to do anything.
10035 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10036 PrePostActionTy &) {
10037 if (!Info.CaptureDeviceAddrMap.empty()) {
10038 CodeGen.setAction(NoPrivAction);
10039 CodeGen(CGF);
10040 }
10041 };
10042
10043 // We don't have to do anything to close the region if the if clause evaluates
10044 // to false.
10045 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10046
10047 if (IfCond) {
10048 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10049 } else {
10050 RegionCodeGenTy RCG(BeginThenGen);
10051 RCG(CGF);
10052 }
10053
10054 // If we don't require privatization of device pointers, we emit the body in
10055 // between the runtime calls. This avoids duplicating the body code.
10056 if (Info.CaptureDeviceAddrMap.empty()) {
10057 CodeGen.setAction(NoPrivAction);
10058 CodeGen(CGF);
10059 }
10060
10061 if (IfCond) {
10062 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10063 } else {
10064 RegionCodeGenTy RCG(EndThenGen);
10065 RCG(CGF);
10066 }
10067 }
10068
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10069 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10070 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10071 const Expr *Device) {
10072 if (!CGF.HaveInsertPoint())
10073 return;
10074
10075 assert((isa<OMPTargetEnterDataDirective>(D) ||
10076 isa<OMPTargetExitDataDirective>(D) ||
10077 isa<OMPTargetUpdateDirective>(D)) &&
10078 "Expecting either target enter, exit data, or update directives.");
10079
10080 CodeGenFunction::OMPTargetDataInfo InputInfo;
10081 llvm::Value *MapTypesArray = nullptr;
10082 // Generate the code for the opening of the data environment.
10083 auto &&ThenGen = [this, &D, Device, &InputInfo,
10084 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10085 // Emit device ID if any.
10086 llvm::Value *DeviceID = nullptr;
10087 if (Device) {
10088 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10089 CGF.Int64Ty, /*isSigned=*/true);
10090 } else {
10091 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10092 }
10093
10094 // Emit the number of elements in the offloading arrays.
10095 llvm::Constant *PointerNum =
10096 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10097
10098 llvm::Value *OffloadingArgs[] = {DeviceID,
10099 PointerNum,
10100 InputInfo.BasePointersArray.getPointer(),
10101 InputInfo.PointersArray.getPointer(),
10102 InputInfo.SizesArray.getPointer(),
10103 MapTypesArray};
10104
10105 // Select the right runtime function call for each expected standalone
10106 // directive.
10107 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10108 OpenMPRTLFunction RTLFn;
10109 switch (D.getDirectiveKind()) {
10110 case OMPD_target_enter_data:
10111 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10112 : OMPRTL__tgt_target_data_begin;
10113 break;
10114 case OMPD_target_exit_data:
10115 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10116 : OMPRTL__tgt_target_data_end;
10117 break;
10118 case OMPD_target_update:
10119 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10120 : OMPRTL__tgt_target_data_update;
10121 break;
10122 case OMPD_parallel:
10123 case OMPD_for:
10124 case OMPD_parallel_for:
10125 case OMPD_parallel_master:
10126 case OMPD_parallel_sections:
10127 case OMPD_for_simd:
10128 case OMPD_parallel_for_simd:
10129 case OMPD_cancel:
10130 case OMPD_cancellation_point:
10131 case OMPD_ordered:
10132 case OMPD_threadprivate:
10133 case OMPD_allocate:
10134 case OMPD_task:
10135 case OMPD_simd:
10136 case OMPD_sections:
10137 case OMPD_section:
10138 case OMPD_single:
10139 case OMPD_master:
10140 case OMPD_critical:
10141 case OMPD_taskyield:
10142 case OMPD_barrier:
10143 case OMPD_taskwait:
10144 case OMPD_taskgroup:
10145 case OMPD_atomic:
10146 case OMPD_flush:
10147 case OMPD_teams:
10148 case OMPD_target_data:
10149 case OMPD_distribute:
10150 case OMPD_distribute_simd:
10151 case OMPD_distribute_parallel_for:
10152 case OMPD_distribute_parallel_for_simd:
10153 case OMPD_teams_distribute:
10154 case OMPD_teams_distribute_simd:
10155 case OMPD_teams_distribute_parallel_for:
10156 case OMPD_teams_distribute_parallel_for_simd:
10157 case OMPD_declare_simd:
10158 case OMPD_declare_variant:
10159 case OMPD_declare_target:
10160 case OMPD_end_declare_target:
10161 case OMPD_declare_reduction:
10162 case OMPD_declare_mapper:
10163 case OMPD_taskloop:
10164 case OMPD_taskloop_simd:
10165 case OMPD_master_taskloop:
10166 case OMPD_master_taskloop_simd:
10167 case OMPD_parallel_master_taskloop:
10168 case OMPD_parallel_master_taskloop_simd:
10169 case OMPD_target:
10170 case OMPD_target_simd:
10171 case OMPD_target_teams_distribute:
10172 case OMPD_target_teams_distribute_simd:
10173 case OMPD_target_teams_distribute_parallel_for:
10174 case OMPD_target_teams_distribute_parallel_for_simd:
10175 case OMPD_target_teams:
10176 case OMPD_target_parallel:
10177 case OMPD_target_parallel_for:
10178 case OMPD_target_parallel_for_simd:
10179 case OMPD_requires:
10180 case OMPD_unknown:
10181 llvm_unreachable("Unexpected standalone target data directive.");
10182 break;
10183 }
10184 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10185 };
10186
10187 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10188 CodeGenFunction &CGF, PrePostActionTy &) {
10189 // Fill up the arrays with all the mapped variables.
10190 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10191 MappableExprsHandler::MapValuesArrayTy Pointers;
10192 MappableExprsHandler::MapValuesArrayTy Sizes;
10193 MappableExprsHandler::MapFlagsArrayTy MapTypes;
10194
10195 // Get map clause information.
10196 MappableExprsHandler MEHandler(D, CGF);
10197 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10198
10199 TargetDataInfo Info;
10200 // Fill up the arrays and create the arguments.
10201 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10202 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10203 Info.PointersArray, Info.SizesArray,
10204 Info.MapTypesArray, Info);
10205 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10206 InputInfo.BasePointersArray =
10207 Address(Info.BasePointersArray, CGM.getPointerAlign());
10208 InputInfo.PointersArray =
10209 Address(Info.PointersArray, CGM.getPointerAlign());
10210 InputInfo.SizesArray =
10211 Address(Info.SizesArray, CGM.getPointerAlign());
10212 MapTypesArray = Info.MapTypesArray;
10213 if (D.hasClausesOfKind<OMPDependClause>())
10214 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10215 else
10216 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10217 };
10218
10219 if (IfCond) {
10220 emitIfClause(CGF, IfCond, TargetThenGen,
10221 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10222 } else {
10223 RegionCodeGenTy ThenRCG(TargetThenGen);
10224 ThenRCG(CGF);
10225 }
10226 }
10227
10228 namespace {
10229 /// Kind of parameter in a function with 'declare simd' directive.
10230 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10231 /// Attribute set of the parameter.
10232 struct ParamAttrTy {
10233 ParamKindTy Kind = Vector;
10234 llvm::APSInt StrideOrArg;
10235 llvm::APSInt Alignment;
10236 };
10237 } // namespace
10238
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10239 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10240 ArrayRef<ParamAttrTy> ParamAttrs) {
10241 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10242 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10243 // of that clause. The VLEN value must be power of 2.
10244 // In other case the notion of the function`s "characteristic data type" (CDT)
10245 // is used to compute the vector length.
10246 // CDT is defined in the following order:
10247 // a) For non-void function, the CDT is the return type.
10248 // b) If the function has any non-uniform, non-linear parameters, then the
10249 // CDT is the type of the first such parameter.
10250 // c) If the CDT determined by a) or b) above is struct, union, or class
10251 // type which is pass-by-value (except for the type that maps to the
10252 // built-in complex data type), the characteristic data type is int.
10253 // d) If none of the above three cases is applicable, the CDT is int.
10254 // The VLEN is then determined based on the CDT and the size of vector
10255 // register of that ISA for which current vector version is generated. The
10256 // VLEN is computed using the formula below:
10257 // VLEN = sizeof(vector_register) / sizeof(CDT),
10258 // where vector register size specified in section 3.2.1 Registers and the
10259 // Stack Frame of original AMD64 ABI document.
10260 QualType RetType = FD->getReturnType();
10261 if (RetType.isNull())
10262 return 0;
10263 ASTContext &C = FD->getASTContext();
10264 QualType CDT;
10265 if (!RetType.isNull() && !RetType->isVoidType()) {
10266 CDT = RetType;
10267 } else {
10268 unsigned Offset = 0;
10269 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10270 if (ParamAttrs[Offset].Kind == Vector)
10271 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10272 ++Offset;
10273 }
10274 if (CDT.isNull()) {
10275 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10276 if (ParamAttrs[I + Offset].Kind == Vector) {
10277 CDT = FD->getParamDecl(I)->getType();
10278 break;
10279 }
10280 }
10281 }
10282 }
10283 if (CDT.isNull())
10284 CDT = C.IntTy;
10285 CDT = CDT->getCanonicalTypeUnqualified();
10286 if (CDT->isRecordType() || CDT->isUnionType())
10287 CDT = C.IntTy;
10288 return C.getTypeSize(CDT);
10289 }
10290
10291 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10292 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10293 const llvm::APSInt &VLENVal,
10294 ArrayRef<ParamAttrTy> ParamAttrs,
10295 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10296 struct ISADataTy {
10297 char ISA;
10298 unsigned VecRegSize;
10299 };
10300 ISADataTy ISAData[] = {
10301 {
10302 'b', 128
10303 }, // SSE
10304 {
10305 'c', 256
10306 }, // AVX
10307 {
10308 'd', 256
10309 }, // AVX2
10310 {
10311 'e', 512
10312 }, // AVX512
10313 };
10314 llvm::SmallVector<char, 2> Masked;
10315 switch (State) {
10316 case OMPDeclareSimdDeclAttr::BS_Undefined:
10317 Masked.push_back('N');
10318 Masked.push_back('M');
10319 break;
10320 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10321 Masked.push_back('N');
10322 break;
10323 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10324 Masked.push_back('M');
10325 break;
10326 }
10327 for (char Mask : Masked) {
10328 for (const ISADataTy &Data : ISAData) {
10329 SmallString<256> Buffer;
10330 llvm::raw_svector_ostream Out(Buffer);
10331 Out << "_ZGV" << Data.ISA << Mask;
10332 if (!VLENVal) {
10333 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10334 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10335 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10336 } else {
10337 Out << VLENVal;
10338 }
10339 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10340 switch (ParamAttr.Kind){
10341 case LinearWithVarStride:
10342 Out << 's' << ParamAttr.StrideOrArg;
10343 break;
10344 case Linear:
10345 Out << 'l';
10346 if (!!ParamAttr.StrideOrArg)
10347 Out << ParamAttr.StrideOrArg;
10348 break;
10349 case Uniform:
10350 Out << 'u';
10351 break;
10352 case Vector:
10353 Out << 'v';
10354 break;
10355 }
10356 if (!!ParamAttr.Alignment)
10357 Out << 'a' << ParamAttr.Alignment;
10358 }
10359 Out << '_' << Fn->getName();
10360 Fn->addFnAttr(Out.str());
10361 }
10362 }
10363 }
10364
10365 // This are the Functions that are needed to mangle the name of the
10366 // vector functions generated by the compiler, according to the rules
10367 // defined in the "Vector Function ABI specifications for AArch64",
10368 // available at
10369 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10370
10371 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10372 ///
10373 /// TODO: Need to implement the behavior for reference marked with a
10374 /// var or no linear modifiers (1.b in the section). For this, we
10375 /// need to extend ParamKindTy to support the linear modifiers.
getAArch64MTV(QualType QT,ParamKindTy Kind)10376 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10377 QT = QT.getCanonicalType();
10378
10379 if (QT->isVoidType())
10380 return false;
10381
10382 if (Kind == ParamKindTy::Uniform)
10383 return false;
10384
10385 if (Kind == ParamKindTy::Linear)
10386 return false;
10387
10388 // TODO: Handle linear references with modifiers
10389
10390 if (Kind == ParamKindTy::LinearWithVarStride)
10391 return false;
10392
10393 return true;
10394 }
10395
10396 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10397 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10398 QT = QT.getCanonicalType();
10399 unsigned Size = C.getTypeSize(QT);
10400
10401 // Only scalars and complex within 16 bytes wide set PVB to true.
10402 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10403 return false;
10404
10405 if (QT->isFloatingType())
10406 return true;
10407
10408 if (QT->isIntegerType())
10409 return true;
10410
10411 if (QT->isPointerType())
10412 return true;
10413
10414 // TODO: Add support for complex types (section 3.1.2, item 2).
10415
10416 return false;
10417 }
10418
10419 /// Computes the lane size (LS) of a return type or of an input parameter,
10420 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10421 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10422 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10423 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10424 QualType PTy = QT.getCanonicalType()->getPointeeType();
10425 if (getAArch64PBV(PTy, C))
10426 return C.getTypeSize(PTy);
10427 }
10428 if (getAArch64PBV(QT, C))
10429 return C.getTypeSize(QT);
10430
10431 return C.getTypeSize(C.getUIntPtrType());
10432 }
10433
10434 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10435 // signature of the scalar function, as defined in 3.2.2 of the
10436 // AAVFABI.
10437 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10438 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10439 QualType RetType = FD->getReturnType().getCanonicalType();
10440
10441 ASTContext &C = FD->getASTContext();
10442
10443 bool OutputBecomesInput = false;
10444
10445 llvm::SmallVector<unsigned, 8> Sizes;
10446 if (!RetType->isVoidType()) {
10447 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10448 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10449 OutputBecomesInput = true;
10450 }
10451 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10452 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10453 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10454 }
10455
10456 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10457 // The LS of a function parameter / return value can only be a power
10458 // of 2, starting from 8 bits, up to 128.
10459 assert(std::all_of(Sizes.begin(), Sizes.end(),
10460 [](unsigned Size) {
10461 return Size == 8 || Size == 16 || Size == 32 ||
10462 Size == 64 || Size == 128;
10463 }) &&
10464 "Invalid size");
10465
10466 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10467 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10468 OutputBecomesInput);
10469 }
10470
10471 /// Mangle the parameter part of the vector function name according to
10472 /// their OpenMP classification. The mangling function is defined in
10473 /// section 3.5 of the AAVFABI.
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10474 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10475 SmallString<256> Buffer;
10476 llvm::raw_svector_ostream Out(Buffer);
10477 for (const auto &ParamAttr : ParamAttrs) {
10478 switch (ParamAttr.Kind) {
10479 case LinearWithVarStride:
10480 Out << "ls" << ParamAttr.StrideOrArg;
10481 break;
10482 case Linear:
10483 Out << 'l';
10484 // Don't print the step value if it is not present or if it is
10485 // equal to 1.
10486 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10487 Out << ParamAttr.StrideOrArg;
10488 break;
10489 case Uniform:
10490 Out << 'u';
10491 break;
10492 case Vector:
10493 Out << 'v';
10494 break;
10495 }
10496
10497 if (!!ParamAttr.Alignment)
10498 Out << 'a' << ParamAttr.Alignment;
10499 }
10500
10501 return Out.str();
10502 }
10503
10504 // Function used to add the attribute. The parameter `VLEN` is
10505 // templated to allow the use of "x" when targeting scalable functions
10506 // for SVE.
10507 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10508 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10509 char ISA, StringRef ParSeq,
10510 StringRef MangledName, bool OutputBecomesInput,
10511 llvm::Function *Fn) {
10512 SmallString<256> Buffer;
10513 llvm::raw_svector_ostream Out(Buffer);
10514 Out << Prefix << ISA << LMask << VLEN;
10515 if (OutputBecomesInput)
10516 Out << "v";
10517 Out << ParSeq << "_" << MangledName;
10518 Fn->addFnAttr(Out.str());
10519 }
10520
10521 // Helper function to generate the Advanced SIMD names depending on
10522 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10523 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10524 StringRef Prefix, char ISA,
10525 StringRef ParSeq, StringRef MangledName,
10526 bool OutputBecomesInput,
10527 llvm::Function *Fn) {
10528 switch (NDS) {
10529 case 8:
10530 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10531 OutputBecomesInput, Fn);
10532 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10533 OutputBecomesInput, Fn);
10534 break;
10535 case 16:
10536 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10537 OutputBecomesInput, Fn);
10538 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10539 OutputBecomesInput, Fn);
10540 break;
10541 case 32:
10542 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10543 OutputBecomesInput, Fn);
10544 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10545 OutputBecomesInput, Fn);
10546 break;
10547 case 64:
10548 case 128:
10549 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10550 OutputBecomesInput, Fn);
10551 break;
10552 default:
10553 llvm_unreachable("Scalar type is too wide.");
10554 }
10555 }
10556
10557 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10558 static void emitAArch64DeclareSimdFunction(
10559 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10560 ArrayRef<ParamAttrTy> ParamAttrs,
10561 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10562 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10563
10564 // Get basic data for building the vector signature.
10565 const auto Data = getNDSWDS(FD, ParamAttrs);
10566 const unsigned NDS = std::get<0>(Data);
10567 const unsigned WDS = std::get<1>(Data);
10568 const bool OutputBecomesInput = std::get<2>(Data);
10569
10570 // Check the values provided via `simdlen` by the user.
10571 // 1. A `simdlen(1)` doesn't produce vector signatures,
10572 if (UserVLEN == 1) {
10573 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10574 DiagnosticsEngine::Warning,
10575 "The clause simdlen(1) has no effect when targeting aarch64.");
10576 CGM.getDiags().Report(SLoc, DiagID);
10577 return;
10578 }
10579
10580 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10581 // Advanced SIMD output.
10582 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10583 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10584 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10585 "power of 2 when targeting Advanced SIMD.");
10586 CGM.getDiags().Report(SLoc, DiagID);
10587 return;
10588 }
10589
10590 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10591 // limits.
10592 if (ISA == 's' && UserVLEN != 0) {
10593 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10594 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10595 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10596 "lanes in the architectural constraints "
10597 "for SVE (min is 128-bit, max is "
10598 "2048-bit, by steps of 128-bit)");
10599 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10600 return;
10601 }
10602 }
10603
10604 // Sort out parameter sequence.
10605 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10606 StringRef Prefix = "_ZGV";
10607 // Generate simdlen from user input (if any).
10608 if (UserVLEN) {
10609 if (ISA == 's') {
10610 // SVE generates only a masked function.
10611 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10612 OutputBecomesInput, Fn);
10613 } else {
10614 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10615 // Advanced SIMD generates one or two functions, depending on
10616 // the `[not]inbranch` clause.
10617 switch (State) {
10618 case OMPDeclareSimdDeclAttr::BS_Undefined:
10619 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10620 OutputBecomesInput, Fn);
10621 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10622 OutputBecomesInput, Fn);
10623 break;
10624 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10625 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10626 OutputBecomesInput, Fn);
10627 break;
10628 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10629 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10630 OutputBecomesInput, Fn);
10631 break;
10632 }
10633 }
10634 } else {
10635 // If no user simdlen is provided, follow the AAVFABI rules for
10636 // generating the vector length.
10637 if (ISA == 's') {
10638 // SVE, section 3.4.1, item 1.
10639 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10640 OutputBecomesInput, Fn);
10641 } else {
10642 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10643 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10644 // two vector names depending on the use of the clause
10645 // `[not]inbranch`.
10646 switch (State) {
10647 case OMPDeclareSimdDeclAttr::BS_Undefined:
10648 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10649 OutputBecomesInput, Fn);
10650 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10651 OutputBecomesInput, Fn);
10652 break;
10653 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10654 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10655 OutputBecomesInput, Fn);
10656 break;
10657 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10658 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10659 OutputBecomesInput, Fn);
10660 break;
10661 }
10662 }
10663 }
10664 }
10665
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)10666 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10667 llvm::Function *Fn) {
10668 ASTContext &C = CGM.getContext();
10669 FD = FD->getMostRecentDecl();
10670 // Map params to their positions in function decl.
10671 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10672 if (isa<CXXMethodDecl>(FD))
10673 ParamPositions.try_emplace(FD, 0);
10674 unsigned ParamPos = ParamPositions.size();
10675 for (const ParmVarDecl *P : FD->parameters()) {
10676 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10677 ++ParamPos;
10678 }
10679 while (FD) {
10680 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10681 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10682 // Mark uniform parameters.
10683 for (const Expr *E : Attr->uniforms()) {
10684 E = E->IgnoreParenImpCasts();
10685 unsigned Pos;
10686 if (isa<CXXThisExpr>(E)) {
10687 Pos = ParamPositions[FD];
10688 } else {
10689 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10690 ->getCanonicalDecl();
10691 Pos = ParamPositions[PVD];
10692 }
10693 ParamAttrs[Pos].Kind = Uniform;
10694 }
10695 // Get alignment info.
10696 auto NI = Attr->alignments_begin();
10697 for (const Expr *E : Attr->aligneds()) {
10698 E = E->IgnoreParenImpCasts();
10699 unsigned Pos;
10700 QualType ParmTy;
10701 if (isa<CXXThisExpr>(E)) {
10702 Pos = ParamPositions[FD];
10703 ParmTy = E->getType();
10704 } else {
10705 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10706 ->getCanonicalDecl();
10707 Pos = ParamPositions[PVD];
10708 ParmTy = PVD->getType();
10709 }
10710 ParamAttrs[Pos].Alignment =
10711 (*NI)
10712 ? (*NI)->EvaluateKnownConstInt(C)
10713 : llvm::APSInt::getUnsigned(
10714 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10715 .getQuantity());
10716 ++NI;
10717 }
10718 // Mark linear parameters.
10719 auto SI = Attr->steps_begin();
10720 auto MI = Attr->modifiers_begin();
10721 for (const Expr *E : Attr->linears()) {
10722 E = E->IgnoreParenImpCasts();
10723 unsigned Pos;
10724 if (isa<CXXThisExpr>(E)) {
10725 Pos = ParamPositions[FD];
10726 } else {
10727 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10728 ->getCanonicalDecl();
10729 Pos = ParamPositions[PVD];
10730 }
10731 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10732 ParamAttr.Kind = Linear;
10733 if (*SI) {
10734 Expr::EvalResult Result;
10735 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10736 if (const auto *DRE =
10737 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10738 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10739 ParamAttr.Kind = LinearWithVarStride;
10740 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10741 ParamPositions[StridePVD->getCanonicalDecl()]);
10742 }
10743 }
10744 } else {
10745 ParamAttr.StrideOrArg = Result.Val.getInt();
10746 }
10747 }
10748 ++SI;
10749 ++MI;
10750 }
10751 llvm::APSInt VLENVal;
10752 SourceLocation ExprLoc;
10753 const Expr *VLENExpr = Attr->getSimdlen();
10754 if (VLENExpr) {
10755 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10756 ExprLoc = VLENExpr->getExprLoc();
10757 }
10758 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10759 if (CGM.getTriple().isX86()) {
10760 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10761 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10762 unsigned VLEN = VLENVal.getExtValue();
10763 StringRef MangledName = Fn->getName();
10764 if (CGM.getTarget().hasFeature("sve"))
10765 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10766 MangledName, 's', 128, Fn, ExprLoc);
10767 if (CGM.getTarget().hasFeature("neon"))
10768 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10769 MangledName, 'n', 128, Fn, ExprLoc);
10770 }
10771 }
10772 FD = FD->getPreviousDecl();
10773 }
10774 }
10775
10776 namespace {
10777 /// Cleanup action for doacross support.
10778 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10779 public:
10780 static const int DoacrossFinArgs = 2;
10781
10782 private:
10783 llvm::FunctionCallee RTLFn;
10784 llvm::Value *Args[DoacrossFinArgs];
10785
10786 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)10787 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10788 ArrayRef<llvm::Value *> CallArgs)
10789 : RTLFn(RTLFn) {
10790 assert(CallArgs.size() == DoacrossFinArgs);
10791 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10792 }
Emit(CodeGenFunction & CGF,Flags)10793 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10794 if (!CGF.HaveInsertPoint())
10795 return;
10796 CGF.EmitRuntimeCall(RTLFn, Args);
10797 }
10798 };
10799 } // namespace
10800
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)10801 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10802 const OMPLoopDirective &D,
10803 ArrayRef<Expr *> NumIterations) {
10804 if (!CGF.HaveInsertPoint())
10805 return;
10806
10807 ASTContext &C = CGM.getContext();
10808 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10809 RecordDecl *RD;
10810 if (KmpDimTy.isNull()) {
10811 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10812 // kmp_int64 lo; // lower
10813 // kmp_int64 up; // upper
10814 // kmp_int64 st; // stride
10815 // };
10816 RD = C.buildImplicitRecord("kmp_dim");
10817 RD->startDefinition();
10818 addFieldToRecordDecl(C, RD, Int64Ty);
10819 addFieldToRecordDecl(C, RD, Int64Ty);
10820 addFieldToRecordDecl(C, RD, Int64Ty);
10821 RD->completeDefinition();
10822 KmpDimTy = C.getRecordType(RD);
10823 } else {
10824 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10825 }
10826 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10827 QualType ArrayTy =
10828 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10829
10830 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10831 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10832 enum { LowerFD = 0, UpperFD, StrideFD };
10833 // Fill dims with data.
10834 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10835 LValue DimsLVal = CGF.MakeAddrLValue(
10836 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10837 // dims.upper = num_iterations;
10838 LValue UpperLVal = CGF.EmitLValueForField(
10839 DimsLVal, *std::next(RD->field_begin(), UpperFD));
10840 llvm::Value *NumIterVal =
10841 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10842 D.getNumIterations()->getType(), Int64Ty,
10843 D.getNumIterations()->getExprLoc());
10844 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10845 // dims.stride = 1;
10846 LValue StrideLVal = CGF.EmitLValueForField(
10847 DimsLVal, *std::next(RD->field_begin(), StrideFD));
10848 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10849 StrideLVal);
10850 }
10851
10852 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10853 // kmp_int32 num_dims, struct kmp_dim * dims);
10854 llvm::Value *Args[] = {
10855 emitUpdateLocation(CGF, D.getBeginLoc()),
10856 getThreadID(CGF, D.getBeginLoc()),
10857 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10858 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10859 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10860 CGM.VoidPtrTy)};
10861
10862 llvm::FunctionCallee RTLFn =
10863 createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10864 CGF.EmitRuntimeCall(RTLFn, Args);
10865 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10866 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10867 llvm::FunctionCallee FiniRTLFn =
10868 createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10869 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10870 llvm::makeArrayRef(FiniArgs));
10871 }
10872
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)10873 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10874 const OMPDependClause *C) {
10875 QualType Int64Ty =
10876 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10877 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10878 QualType ArrayTy = CGM.getContext().getConstantArrayType(
10879 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10880 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10881 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10882 const Expr *CounterVal = C->getLoopData(I);
10883 assert(CounterVal);
10884 llvm::Value *CntVal = CGF.EmitScalarConversion(
10885 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10886 CounterVal->getExprLoc());
10887 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10888 /*Volatile=*/false, Int64Ty);
10889 }
10890 llvm::Value *Args[] = {
10891 emitUpdateLocation(CGF, C->getBeginLoc()),
10892 getThreadID(CGF, C->getBeginLoc()),
10893 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10894 llvm::FunctionCallee RTLFn;
10895 if (C->getDependencyKind() == OMPC_DEPEND_source) {
10896 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10897 } else {
10898 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10899 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10900 }
10901 CGF.EmitRuntimeCall(RTLFn, Args);
10902 }
10903
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const10904 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10905 llvm::FunctionCallee Callee,
10906 ArrayRef<llvm::Value *> Args) const {
10907 assert(Loc.isValid() && "Outlined function call location must be valid.");
10908 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10909
10910 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10911 if (Fn->doesNotThrow()) {
10912 CGF.EmitNounwindRuntimeCall(Fn, Args);
10913 return;
10914 }
10915 }
10916 CGF.EmitRuntimeCall(Callee, Args);
10917 }
10918
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const10919 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10920 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10921 ArrayRef<llvm::Value *> Args) const {
10922 emitCall(CGF, Loc, OutlinedFn, Args);
10923 }
10924
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)10925 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10926 if (const auto *FD = dyn_cast<FunctionDecl>(D))
10927 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10928 HasEmittedDeclareTargetRegion = true;
10929 }
10930
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const10931 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10932 const VarDecl *NativeParam,
10933 const VarDecl *TargetParam) const {
10934 return CGF.GetAddrOfLocalVar(NativeParam);
10935 }
10936
10937 namespace {
10938 /// Cleanup action for allocate support.
10939 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10940 public:
10941 static const int CleanupArgs = 3;
10942
10943 private:
10944 llvm::FunctionCallee RTLFn;
10945 llvm::Value *Args[CleanupArgs];
10946
10947 public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)10948 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10949 ArrayRef<llvm::Value *> CallArgs)
10950 : RTLFn(RTLFn) {
10951 assert(CallArgs.size() == CleanupArgs &&
10952 "Size of arguments does not match.");
10953 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10954 }
Emit(CodeGenFunction & CGF,Flags)10955 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10956 if (!CGF.HaveInsertPoint())
10957 return;
10958 CGF.EmitRuntimeCall(RTLFn, Args);
10959 }
10960 };
10961 } // namespace
10962
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)10963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10964 const VarDecl *VD) {
10965 if (!VD)
10966 return Address::invalid();
10967 const VarDecl *CVD = VD->getCanonicalDecl();
10968 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10969 return Address::invalid();
10970 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10971 // Use the default allocation.
10972 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10973 !AA->getAllocator())
10974 return Address::invalid();
10975 llvm::Value *Size;
10976 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10977 if (CVD->getType()->isVariablyModifiedType()) {
10978 Size = CGF.getTypeSize(CVD->getType());
10979 // Align the size: ((size + align - 1) / align) * align
10980 Size = CGF.Builder.CreateNUWAdd(
10981 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10982 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10983 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10984 } else {
10985 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10986 Size = CGM.getSize(Sz.alignTo(Align));
10987 }
10988 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10989 assert(AA->getAllocator() &&
10990 "Expected allocator expression for non-default allocator.");
10991 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10992 // According to the standard, the original allocator type is a enum (integer).
10993 // Convert to pointer type, if required.
10994 if (Allocator->getType()->isIntegerTy())
10995 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10996 else if (Allocator->getType()->isPointerTy())
10997 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10998 CGM.VoidPtrTy);
10999 llvm::Value *Args[] = {ThreadID, Size, Allocator};
11000
11001 llvm::Value *Addr =
11002 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11003 CVD->getName() + ".void.addr");
11004 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11005 Allocator};
11006 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11007
11008 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11009 llvm::makeArrayRef(FiniArgs));
11010 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11011 Addr,
11012 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11013 CVD->getName() + ".addr");
11014 return Address(Addr, Align);
11015 }
11016
11017 namespace {
11018 using OMPContextSelectorData =
11019 OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
11020 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11021 } // anonymous namespace
11022
11023 /// Checks current context and returns true if it matches the context selector.
11024 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
11025 typename... Arguments>
checkContext(const OMPContextSelectorData & Data,Arguments...Params)11026 static bool checkContext(const OMPContextSelectorData &Data,
11027 Arguments... Params) {
11028 assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11029 "Unknown context selector or context selector set.");
11030 return false;
11031 }
11032
11033 /// Checks for implementation={vendor(<vendor>)} context selector.
11034 /// \returns true iff <vendor>="llvm", false otherwise.
11035 template <>
checkContext(const OMPContextSelectorData & Data)11036 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
11037 const OMPContextSelectorData &Data) {
11038 return llvm::all_of(Data.Names,
11039 [](StringRef S) { return !S.compare_lower("llvm"); });
11040 }
11041
11042 /// Checks for device={kind(<kind>)} context selector.
11043 /// \returns true if <kind>="host" and compilation is for host.
11044 /// true if <kind>="nohost" and compilation is for device.
11045 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11046 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11047 /// false otherwise.
11048 template <>
checkContext(const OMPContextSelectorData & Data,CodeGenModule & CGM)11049 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11050 const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11051 for (StringRef Name : Data.Names) {
11052 if (!Name.compare_lower("host")) {
11053 if (CGM.getLangOpts().OpenMPIsDevice)
11054 return false;
11055 continue;
11056 }
11057 if (!Name.compare_lower("nohost")) {
11058 if (!CGM.getLangOpts().OpenMPIsDevice)
11059 return false;
11060 continue;
11061 }
11062 switch (CGM.getTriple().getArch()) {
11063 case llvm::Triple::arm:
11064 case llvm::Triple::armeb:
11065 case llvm::Triple::aarch64:
11066 case llvm::Triple::aarch64_be:
11067 case llvm::Triple::aarch64_32:
11068 case llvm::Triple::ppc:
11069 case llvm::Triple::ppc64:
11070 case llvm::Triple::ppc64le:
11071 case llvm::Triple::x86:
11072 case llvm::Triple::x86_64:
11073 if (Name.compare_lower("cpu"))
11074 return false;
11075 break;
11076 case llvm::Triple::amdgcn:
11077 case llvm::Triple::nvptx:
11078 case llvm::Triple::nvptx64:
11079 if (Name.compare_lower("gpu"))
11080 return false;
11081 break;
11082 case llvm::Triple::UnknownArch:
11083 case llvm::Triple::arc:
11084 case llvm::Triple::avr:
11085 case llvm::Triple::bpfel:
11086 case llvm::Triple::bpfeb:
11087 case llvm::Triple::hexagon:
11088 case llvm::Triple::mips:
11089 case llvm::Triple::mipsel:
11090 case llvm::Triple::mips64:
11091 case llvm::Triple::mips64el:
11092 case llvm::Triple::msp430:
11093 case llvm::Triple::r600:
11094 case llvm::Triple::riscv32:
11095 case llvm::Triple::riscv64:
11096 case llvm::Triple::sparc:
11097 case llvm::Triple::sparcv9:
11098 case llvm::Triple::sparcel:
11099 case llvm::Triple::systemz:
11100 case llvm::Triple::tce:
11101 case llvm::Triple::tcele:
11102 case llvm::Triple::thumb:
11103 case llvm::Triple::thumbeb:
11104 case llvm::Triple::xcore:
11105 case llvm::Triple::le32:
11106 case llvm::Triple::le64:
11107 case llvm::Triple::amdil:
11108 case llvm::Triple::amdil64:
11109 case llvm::Triple::hsail:
11110 case llvm::Triple::hsail64:
11111 case llvm::Triple::spir:
11112 case llvm::Triple::spir64:
11113 case llvm::Triple::kalimba:
11114 case llvm::Triple::shave:
11115 case llvm::Triple::lanai:
11116 case llvm::Triple::wasm32:
11117 case llvm::Triple::wasm64:
11118 case llvm::Triple::renderscript32:
11119 case llvm::Triple::renderscript64:
11120 case llvm::Triple::ve:
11121 return false;
11122 }
11123 }
11124 return true;
11125 }
11126
matchesContext(CodeGenModule & CGM,const CompleteOMPContextSelectorData & ContextData)11127 static bool matchesContext(CodeGenModule &CGM,
11128 const CompleteOMPContextSelectorData &ContextData) {
11129 for (const OMPContextSelectorData &Data : ContextData) {
11130 switch (Data.Ctx) {
11131 case OMP_CTX_vendor:
11132 assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11133 "Expected implementation context selector set.");
11134 if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
11135 return false;
11136 break;
11137 case OMP_CTX_kind:
11138 assert(Data.CtxSet == OMP_CTX_SET_device &&
11139 "Expected device context selector set.");
11140 if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11141 CGM))
11142 return false;
11143 break;
11144 case OMP_CTX_unknown:
11145 llvm_unreachable("Unknown context selector kind.");
11146 }
11147 }
11148 return true;
11149 }
11150
11151 static CompleteOMPContextSelectorData
translateAttrToContextSelectorData(ASTContext & C,const OMPDeclareVariantAttr * A)11152 translateAttrToContextSelectorData(ASTContext &C,
11153 const OMPDeclareVariantAttr *A) {
11154 CompleteOMPContextSelectorData Data;
11155 for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11156 Data.emplace_back();
11157 auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11158 *std::next(A->ctxSelectorSets_begin(), I));
11159 auto Ctx = static_cast<OpenMPContextSelectorKind>(
11160 *std::next(A->ctxSelectors_begin(), I));
11161 Data.back().CtxSet = CtxSet;
11162 Data.back().Ctx = Ctx;
11163 const Expr *Score = *std::next(A->scores_begin(), I);
11164 Data.back().Score = Score->EvaluateKnownConstInt(C);
11165 switch (Ctx) {
11166 case OMP_CTX_vendor:
11167 assert(CtxSet == OMP_CTX_SET_implementation &&
11168 "Expected implementation context selector set.");
11169 Data.back().Names =
11170 llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11171 break;
11172 case OMP_CTX_kind:
11173 assert(CtxSet == OMP_CTX_SET_device &&
11174 "Expected device context selector set.");
11175 Data.back().Names =
11176 llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11177 break;
11178 case OMP_CTX_unknown:
11179 llvm_unreachable("Unknown context selector kind.");
11180 }
11181 }
11182 return Data;
11183 }
11184
isStrictSubset(const CompleteOMPContextSelectorData & LHS,const CompleteOMPContextSelectorData & RHS)11185 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11186 const CompleteOMPContextSelectorData &RHS) {
11187 llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11188 for (const OMPContextSelectorData &D : RHS) {
11189 auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11190 Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11191 }
11192 bool AllSetsAreEqual = true;
11193 for (const OMPContextSelectorData &D : LHS) {
11194 auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11195 if (It == RHSData.end())
11196 return false;
11197 if (D.Names.size() > It->getSecond().size())
11198 return false;
11199 if (llvm::set_union(It->getSecond(), D.Names))
11200 return false;
11201 AllSetsAreEqual =
11202 AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11203 }
11204
11205 return LHS.size() != RHS.size() || !AllSetsAreEqual;
11206 }
11207
greaterCtxScore(const CompleteOMPContextSelectorData & LHS,const CompleteOMPContextSelectorData & RHS)11208 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11209 const CompleteOMPContextSelectorData &RHS) {
11210 // Score is calculated as sum of all scores + 1.
11211 llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11212 bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11213 if (RHSIsSubsetOfLHS) {
11214 LHSScore = llvm::APSInt::get(0);
11215 } else {
11216 for (const OMPContextSelectorData &Data : LHS) {
11217 if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11218 LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11219 } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11220 LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11221 } else {
11222 LHSScore += Data.Score;
11223 }
11224 }
11225 }
11226 llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11227 if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11228 RHSScore = llvm::APSInt::get(0);
11229 } else {
11230 for (const OMPContextSelectorData &Data : RHS) {
11231 if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11232 RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11233 } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11234 RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11235 } else {
11236 RHSScore += Data.Score;
11237 }
11238 }
11239 }
11240 return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11241 }
11242
11243 /// Finds the variant function that matches current context with its context
11244 /// selector.
getDeclareVariantFunction(CodeGenModule & CGM,const FunctionDecl * FD)11245 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11246 const FunctionDecl *FD) {
11247 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11248 return FD;
11249 // Iterate through all DeclareVariant attributes and check context selectors.
11250 const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11251 CompleteOMPContextSelectorData TopMostData;
11252 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11253 CompleteOMPContextSelectorData Data =
11254 translateAttrToContextSelectorData(CGM.getContext(), A);
11255 if (!matchesContext(CGM, Data))
11256 continue;
11257 // If the attribute matches the context, find the attribute with the highest
11258 // score.
11259 if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11260 TopMostAttr = A;
11261 TopMostData.swap(Data);
11262 }
11263 }
11264 if (!TopMostAttr)
11265 return FD;
11266 return cast<FunctionDecl>(
11267 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11268 ->getDecl());
11269 }
11270
emitDeclareVariant(GlobalDecl GD,bool IsForDefinition)11271 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11272 const auto *D = cast<FunctionDecl>(GD.getDecl());
11273 // If the original function is defined already, use its definition.
11274 StringRef MangledName = CGM.getMangledName(GD);
11275 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11276 if (Orig && !Orig->isDeclaration())
11277 return false;
11278 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11279 // Emit original function if it does not have declare variant attribute or the
11280 // context does not match.
11281 if (NewFD == D)
11282 return false;
11283 GlobalDecl NewGD = GD.getWithDecl(NewFD);
11284 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11285 DeferredVariantFunction.erase(D);
11286 return true;
11287 }
11288 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11289 return true;
11290 }
11291
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11292 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11293 CodeGenModule &CGM, const OMPLoopDirective &S)
11294 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11295 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11296 if (!NeedToPush)
11297 return;
11298 NontemporalDeclsSet &DS =
11299 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11300 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11301 for (const Stmt *Ref : C->private_refs()) {
11302 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11303 const ValueDecl *VD;
11304 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11305 VD = DRE->getDecl();
11306 } else {
11307 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11308 assert((ME->isImplicitCXXThis() ||
11309 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11310 "Expected member of current class.");
11311 VD = ME->getMemberDecl();
11312 }
11313 DS.insert(VD);
11314 }
11315 }
11316 }
11317
~NontemporalDeclsRAII()11318 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11319 if (!NeedToPush)
11320 return;
11321 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11322 }
11323
isNontemporalDecl(const ValueDecl * VD) const11324 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11325 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11326
11327 return llvm::any_of(
11328 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11329 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11330 }
11331
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11333 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11334 : CGM(CGF.CGM),
11335 NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11336 [](const OMPLastprivateClause *C) {
11337 return C->getKind() ==
11338 OMPC_LASTPRIVATE_conditional;
11339 })) {
11340 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11341 if (!NeedToPush)
11342 return;
11343 LastprivateConditionalData &Data =
11344 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11345 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11346 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11347 continue;
11348
11349 for (const Expr *Ref : C->varlists()) {
11350 Data.DeclToUniqeName.try_emplace(
11351 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11352 generateUniqueName(CGM, "pl_cond", Ref));
11353 }
11354 }
11355 Data.IVLVal = IVLVal;
11356 // In simd only mode or for simd directives no need to generate threadprivate
11357 // references for the loop iteration counter, we can use the original one
11358 // since outlining cannot happen in simd regions.
11359 if (CGF.getLangOpts().OpenMPSimd ||
11360 isOpenMPSimdDirective(S.getDirectiveKind())) {
11361 Data.UseOriginalIV = true;
11362 return;
11363 }
11364 llvm::SmallString<16> Buffer;
11365 llvm::raw_svector_ostream OS(Buffer);
11366 PresumedLoc PLoc =
11367 CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11368 assert(PLoc.isValid() && "Source location is expected to be always valid.");
11369
11370 llvm::sys::fs::UniqueID ID;
11371 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11372 CGM.getDiags().Report(diag::err_cannot_open_file)
11373 << PLoc.getFilename() << EC.message();
11374 OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11375 << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11376 Data.IVName = OS.str();
11377 }
11378
~LastprivateConditionalRAII()11379 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11380 if (!NeedToPush)
11381 return;
11382 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11383 }
11384
initLastprivateConditionalCounter(CodeGenFunction & CGF,const OMPExecutableDirective & S)11385 void CGOpenMPRuntime::initLastprivateConditionalCounter(
11386 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11387 if (CGM.getLangOpts().OpenMPSimd ||
11388 !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11389 [](const OMPLastprivateClause *C) {
11390 return C->getKind() == OMPC_LASTPRIVATE_conditional;
11391 }))
11392 return;
11393 const CGOpenMPRuntime::LastprivateConditionalData &Data =
11394 LastprivateConditionalStack.back();
11395 if (Data.UseOriginalIV)
11396 return;
11397 // Global loop counter. Required to handle inner parallel-for regions.
11398 // global_iv = iv;
11399 Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11400 CGF, Data.IVLVal.getType(), Data.IVName);
11401 LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11402 llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11403 CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11404 }
11405
11406 namespace {
11407 /// Checks if the lastprivate conditional variable is referenced in LHS.
11408 class LastprivateConditionalRefChecker final
11409 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11410 CodeGenFunction &CGF;
11411 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11412 const Expr *FoundE = nullptr;
11413 const Decl *FoundD = nullptr;
11414 StringRef UniqueDeclName;
11415 LValue IVLVal;
11416 StringRef IVName;
11417 SourceLocation Loc;
11418 bool UseOriginalIV = false;
11419
11420 public:
VisitDeclRefExpr(const DeclRefExpr * E)11421 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11422 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11423 llvm::reverse(LPM)) {
11424 auto It = D.DeclToUniqeName.find(E->getDecl());
11425 if (It == D.DeclToUniqeName.end())
11426 continue;
11427 FoundE = E;
11428 FoundD = E->getDecl()->getCanonicalDecl();
11429 UniqueDeclName = It->getSecond();
11430 IVLVal = D.IVLVal;
11431 IVName = D.IVName;
11432 UseOriginalIV = D.UseOriginalIV;
11433 break;
11434 }
11435 return FoundE == E;
11436 }
VisitMemberExpr(const MemberExpr * E)11437 bool VisitMemberExpr(const MemberExpr *E) {
11438 if (!CGF.IsWrappedCXXThis(E->getBase()))
11439 return false;
11440 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11441 llvm::reverse(LPM)) {
11442 auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11443 if (It == D.DeclToUniqeName.end())
11444 continue;
11445 FoundE = E;
11446 FoundD = E->getMemberDecl()->getCanonicalDecl();
11447 UniqueDeclName = It->getSecond();
11448 IVLVal = D.IVLVal;
11449 IVName = D.IVName;
11450 UseOriginalIV = D.UseOriginalIV;
11451 break;
11452 }
11453 return FoundE == E;
11454 }
VisitStmt(const Stmt * S)11455 bool VisitStmt(const Stmt *S) {
11456 for (const Stmt *Child : S->children()) {
11457 if (!Child)
11458 continue;
11459 if (const auto *E = dyn_cast<Expr>(Child))
11460 if (!E->isGLValue())
11461 continue;
11462 if (Visit(Child))
11463 return true;
11464 }
11465 return false;
11466 }
LastprivateConditionalRefChecker(CodeGenFunction & CGF,ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11467 explicit LastprivateConditionalRefChecker(
11468 CodeGenFunction &CGF,
11469 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11470 : CGF(CGF), LPM(LPM) {}
11471 std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
getFoundData() const11472 getFoundData() const {
11473 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11474 UseOriginalIV);
11475 }
11476 };
11477 } // namespace
11478
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11479 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11480 const Expr *LHS) {
11481 if (CGF.getLangOpts().OpenMP < 50)
11482 return;
11483 LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11484 if (!Checker.Visit(LHS))
11485 return;
11486 const Expr *FoundE;
11487 const Decl *FoundD;
11488 StringRef UniqueDeclName;
11489 LValue IVLVal;
11490 StringRef IVName;
11491 bool UseOriginalIV;
11492 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11493 Checker.getFoundData();
11494
11495 // Last updated loop counter for the lastprivate conditional var.
11496 // int<xx> last_iv = 0;
11497 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11498 llvm::Constant *LastIV =
11499 getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11500 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11501 IVLVal.getAlignment().getAsAlign());
11502 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11503
11504 // Private address of the lastprivate conditional in the current context.
11505 // priv_a
11506 LValue LVal = CGF.EmitLValue(FoundE);
11507 // Last value of the lastprivate conditional.
11508 // decltype(priv_a) last_a;
11509 llvm::Constant *Last = getOrCreateInternalVariable(
11510 LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11511 cast<llvm::GlobalVariable>(Last)->setAlignment(
11512 LVal.getAlignment().getAsAlign());
11513 LValue LastLVal =
11514 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11515
11516 // Global loop counter. Required to handle inner parallel-for regions.
11517 // global_iv
11518 if (!UseOriginalIV) {
11519 Address IVAddr =
11520 getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11521 IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11522 }
11523 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11524
11525 // #pragma omp critical(a)
11526 // if (last_iv <= iv) {
11527 // last_iv = iv;
11528 // last_a = priv_a;
11529 // }
11530 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11531 FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11532 Action.Enter(CGF);
11533 llvm::Value *LastIVVal =
11534 CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11535 // (last_iv <= global_iv) ? Check if the variable is updated and store new
11536 // value in global var.
11537 llvm::Value *CmpRes;
11538 if (IVLVal.getType()->isSignedIntegerType()) {
11539 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11540 } else {
11541 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542 "Loop iteration variable must be integer.");
11543 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11544 }
11545 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11548 // {
11549 CGF.EmitBlock(ThenBB);
11550
11551 // last_iv = global_iv;
11552 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11553
11554 // last_a = priv_a;
11555 switch (CGF.getEvaluationKind(LVal.getType())) {
11556 case TEK_Scalar: {
11557 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11558 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11559 break;
11560 }
11561 case TEK_Complex: {
11562 CodeGenFunction::ComplexPairTy PrivVal =
11563 CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11564 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11565 break;
11566 }
11567 case TEK_Aggregate:
11568 llvm_unreachable(
11569 "Aggregates are not supported in lastprivate conditional.");
11570 }
11571 // }
11572 CGF.EmitBranch(ExitBB);
11573 // There is no need to emit line number for unconditional branch.
11574 (void)ApplyDebugLocation::CreateEmpty(CGF);
11575 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11576 };
11577
11578 if (CGM.getLangOpts().OpenMPSimd) {
11579 // Do not emit as a critical region as no parallel region could be emitted.
11580 RegionCodeGenTy ThenRCG(CodeGen);
11581 ThenRCG(CGF);
11582 } else {
11583 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11584 }
11585 }
11586
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11587 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11588 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11589 SourceLocation Loc) {
11590 if (CGF.getLangOpts().OpenMP < 50)
11591 return;
11592 auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11593 assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11594 "Unknown lastprivate conditional variable.");
11595 StringRef UniqueName = It->getSecond();
11596 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11597 // The variable was not updated in the region - exit.
11598 if (!GV)
11599 return;
11600 LValue LPLVal = CGF.MakeAddrLValue(
11601 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11602 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11603 CGF.EmitStoreOfScalar(Res, PrivLVal);
11604 }
11605
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11606 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11607 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11608 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11609 llvm_unreachable("Not supported in SIMD-only mode");
11610 }
11611
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11612 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11613 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11614 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11615 llvm_unreachable("Not supported in SIMD-only mode");
11616 }
11617
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)11618 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11619 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11620 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11621 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11622 bool Tied, unsigned &NumberOfParts) {
11623 llvm_unreachable("Not supported in SIMD-only mode");
11624 }
11625
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)11626 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11627 SourceLocation Loc,
11628 llvm::Function *OutlinedFn,
11629 ArrayRef<llvm::Value *> CapturedVars,
11630 const Expr *IfCond) {
11631 llvm_unreachable("Not supported in SIMD-only mode");
11632 }
11633
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)11634 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11635 CodeGenFunction &CGF, StringRef CriticalName,
11636 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11637 const Expr *Hint) {
11638 llvm_unreachable("Not supported in SIMD-only mode");
11639 }
11640
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)11641 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11642 const RegionCodeGenTy &MasterOpGen,
11643 SourceLocation Loc) {
11644 llvm_unreachable("Not supported in SIMD-only mode");
11645 }
11646
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)11647 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11648 SourceLocation Loc) {
11649 llvm_unreachable("Not supported in SIMD-only mode");
11650 }
11651
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)11652 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11653 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11654 SourceLocation Loc) {
11655 llvm_unreachable("Not supported in SIMD-only mode");
11656 }
11657
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)11658 void CGOpenMPSIMDRuntime::emitSingleRegion(
11659 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11660 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11661 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11662 ArrayRef<const Expr *> AssignmentOps) {
11663 llvm_unreachable("Not supported in SIMD-only mode");
11664 }
11665
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)11666 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11667 const RegionCodeGenTy &OrderedOpGen,
11668 SourceLocation Loc,
11669 bool IsThreads) {
11670 llvm_unreachable("Not supported in SIMD-only mode");
11671 }
11672
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)11673 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11674 SourceLocation Loc,
11675 OpenMPDirectiveKind Kind,
11676 bool EmitChecks,
11677 bool ForceSimpleCall) {
11678 llvm_unreachable("Not supported in SIMD-only mode");
11679 }
11680
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)11681 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11682 CodeGenFunction &CGF, SourceLocation Loc,
11683 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11684 bool Ordered, const DispatchRTInput &DispatchValues) {
11685 llvm_unreachable("Not supported in SIMD-only mode");
11686 }
11687
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)11688 void CGOpenMPSIMDRuntime::emitForStaticInit(
11689 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11690 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11691 llvm_unreachable("Not supported in SIMD-only mode");
11692 }
11693
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)11694 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11695 CodeGenFunction &CGF, SourceLocation Loc,
11696 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11697 llvm_unreachable("Not supported in SIMD-only mode");
11698 }
11699
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)11700 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11701 SourceLocation Loc,
11702 unsigned IVSize,
11703 bool IVSigned) {
11704 llvm_unreachable("Not supported in SIMD-only mode");
11705 }
11706
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)11707 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11708 SourceLocation Loc,
11709 OpenMPDirectiveKind DKind) {
11710 llvm_unreachable("Not supported in SIMD-only mode");
11711 }
11712
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)11713 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11714 SourceLocation Loc,
11715 unsigned IVSize, bool IVSigned,
11716 Address IL, Address LB,
11717 Address UB, Address ST) {
11718 llvm_unreachable("Not supported in SIMD-only mode");
11719 }
11720
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)11721 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11722 llvm::Value *NumThreads,
11723 SourceLocation Loc) {
11724 llvm_unreachable("Not supported in SIMD-only mode");
11725 }
11726
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)11727 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11728 ProcBindKind ProcBind,
11729 SourceLocation Loc) {
11730 llvm_unreachable("Not supported in SIMD-only mode");
11731 }
11732
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)11733 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11734 const VarDecl *VD,
11735 Address VDAddr,
11736 SourceLocation Loc) {
11737 llvm_unreachable("Not supported in SIMD-only mode");
11738 }
11739
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)11740 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11741 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11742 CodeGenFunction *CGF) {
11743 llvm_unreachable("Not supported in SIMD-only mode");
11744 }
11745
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)11746 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11747 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11748 llvm_unreachable("Not supported in SIMD-only mode");
11749 }
11750
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc)11751 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11752 ArrayRef<const Expr *> Vars,
11753 SourceLocation Loc) {
11754 llvm_unreachable("Not supported in SIMD-only mode");
11755 }
11756
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11757 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11758 const OMPExecutableDirective &D,
11759 llvm::Function *TaskFunction,
11760 QualType SharedsTy, Address Shareds,
11761 const Expr *IfCond,
11762 const OMPTaskDataTy &Data) {
11763 llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11766 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11767 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11768 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11769 const Expr *IfCond, const OMPTaskDataTy &Data) {
11770 llvm_unreachable("Not supported in SIMD-only mode");
11771 }
11772
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)11773 void CGOpenMPSIMDRuntime::emitReduction(
11774 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11775 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11776 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11777 assert(Options.SimpleReduction && "Only simple reduction is expected.");
11778 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11779 ReductionOps, Options);
11780 }
11781
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)11782 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11783 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11784 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11785 llvm_unreachable("Not supported in SIMD-only mode");
11786 }
11787
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)11788 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11789 SourceLocation Loc,
11790 ReductionCodeGen &RCG,
11791 unsigned N) {
11792 llvm_unreachable("Not supported in SIMD-only mode");
11793 }
11794
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)11795 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11796 SourceLocation Loc,
11797 llvm::Value *ReductionsPtr,
11798 LValue SharedLVal) {
11799 llvm_unreachable("Not supported in SIMD-only mode");
11800 }
11801
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)11802 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11803 SourceLocation Loc) {
11804 llvm_unreachable("Not supported in SIMD-only mode");
11805 }
11806
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)11807 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11808 CodeGenFunction &CGF, SourceLocation Loc,
11809 OpenMPDirectiveKind CancelRegion) {
11810 llvm_unreachable("Not supported in SIMD-only mode");
11811 }
11812
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)11813 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11814 SourceLocation Loc, const Expr *IfCond,
11815 OpenMPDirectiveKind CancelRegion) {
11816 llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)11819 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11820 const OMPExecutableDirective &D, StringRef ParentName,
11821 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11822 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11823 llvm_unreachable("Not supported in SIMD-only mode");
11824 }
11825
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,const Expr * Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)11826 void CGOpenMPSIMDRuntime::emitTargetCall(
11827 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11828 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11829 const Expr *Device,
11830 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11831 const OMPLoopDirective &D)>
11832 SizeEmitter) {
11833 llvm_unreachable("Not supported in SIMD-only mode");
11834 }
11835
emitTargetFunctions(GlobalDecl GD)11836 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11837 llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839
emitTargetGlobalVariable(GlobalDecl GD)11840 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11841 llvm_unreachable("Not supported in SIMD-only mode");
11842 }
11843
emitTargetGlobal(GlobalDecl GD)11844 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11845 return false;
11846 }
11847
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)11848 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11849 const OMPExecutableDirective &D,
11850 SourceLocation Loc,
11851 llvm::Function *OutlinedFn,
11852 ArrayRef<llvm::Value *> CapturedVars) {
11853 llvm_unreachable("Not supported in SIMD-only mode");
11854 }
11855
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)11856 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11857 const Expr *NumTeams,
11858 const Expr *ThreadLimit,
11859 SourceLocation Loc) {
11860 llvm_unreachable("Not supported in SIMD-only mode");
11861 }
11862
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11863 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11864 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11865 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11866 llvm_unreachable("Not supported in SIMD-only mode");
11867 }
11868
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11869 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11870 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11871 const Expr *Device) {
11872 llvm_unreachable("Not supported in SIMD-only mode");
11873 }
11874
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11875 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11876 const OMPLoopDirective &D,
11877 ArrayRef<Expr *> NumIterations) {
11878 llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11881 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11882 const OMPDependClause *C) {
11883 llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885
11886 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const11887 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11888 const VarDecl *NativeParam) const {
11889 llvm_unreachable("Not supported in SIMD-only mode");
11890 }
11891
11892 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11893 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11894 const VarDecl *NativeParam,
11895 const VarDecl *TargetParam) const {
11896 llvm_unreachable("Not supported in SIMD-only mode");
11897 }
11898