1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50 /// Kinds of OpenMP regions used in codegen.
51 enum CGOpenMPRegionKind {
52 /// Region with outlined function for standalone 'parallel'
53 /// directive.
54 ParallelOutlinedRegion,
55 /// Region with outlined function for standalone 'task' directive.
56 TaskOutlinedRegion,
57 /// Region for constructs that do not require function outlining,
58 /// like 'for', 'sections', 'atomic' etc. directives.
59 InlinedRegion,
60 /// Region with outlined function for standalone 'target' directive.
61 TargetRegion,
62 };
63
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)64 CGOpenMPRegionInfo(const CapturedStmt &CS,
65 const CGOpenMPRegionKind RegionKind,
66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67 bool HasCancel)
68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75 Kind(Kind), HasCancel(HasCancel) {}
76
77 /// Get a variable or parameter for storing global thread id
78 /// inside OpenMP construct.
79 virtual const VarDecl *getThreadIDVariable() const = 0;
80
81 /// Emit the captured statement body.
82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83
84 /// Get an LValue for the current ThreadID variable.
85 /// \return LValue for thread id variable. This LValue always has type int32*.
86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87
emitUntiedSwitch(CodeGenFunction &)88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89
getRegionKind() const90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91
getDirectiveKind() const92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93
hasCancel() const94 bool hasCancel() const { return HasCancel; }
95
classof(const CGCapturedStmtInfo * Info)96 static bool classof(const CGCapturedStmtInfo *Info) {
97 return Info->getKind() == CR_OpenMP;
98 }
99
100 ~CGOpenMPRegionInfo() override = default;
101
102 protected:
103 CGOpenMPRegionKind RegionKind;
104 RegionCodeGenTy CodeGen;
105 OpenMPDirectiveKind Kind;
106 bool HasCancel;
107 };
108
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113 const RegionCodeGenTy &CodeGen,
114 OpenMPDirectiveKind Kind, bool HasCancel,
115 StringRef HelperName)
116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117 HasCancel),
118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120 }
121
122 /// Get a variable or parameter for storing global thread id
123 /// inside OpenMP construct.
getThreadIDVariable() const124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125
126 /// Get the name of the capture helper.
getHelperName() const127 StringRef getHelperName() const override { return HelperName; }
128
classof(const CGCapturedStmtInfo * Info)129 static bool classof(const CGCapturedStmtInfo *Info) {
130 return CGOpenMPRegionInfo::classof(Info) &&
131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132 ParallelOutlinedRegion;
133 }
134
135 private:
136 /// A variable or parameter storing global thread id for OpenMP
137 /// constructs.
138 const VarDecl *ThreadIDVar;
139 StringRef HelperName;
140 };
141
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145 class UntiedTaskActionTy final : public PrePostActionTy {
146 bool Untied;
147 const VarDecl *PartIDVar;
148 const RegionCodeGenTy UntiedCodeGen;
149 llvm::SwitchInst *UntiedSwitch = nullptr;
150
151 public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153 const RegionCodeGenTy &UntiedCodeGen)
154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)155 void Enter(CodeGenFunction &CGF) override {
156 if (Untied) {
157 // Emit task switching point.
158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159 CGF.GetAddrOfLocalVar(PartIDVar),
160 PartIDVar->getType()->castAs<PointerType>());
161 llvm::Value *Res =
162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165 CGF.EmitBlock(DoneBB);
166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169 CGF.Builder.GetInsertBlock());
170 emitUntiedSwitch(CGF);
171 }
172 }
emitUntiedSwitch(CodeGenFunction & CGF) const173 void emitUntiedSwitch(CodeGenFunction &CGF) const {
174 if (Untied) {
175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176 CGF.GetAddrOfLocalVar(PartIDVar),
177 PartIDVar->getType()->castAs<PointerType>());
178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179 PartIdLVal);
180 UntiedCodeGen(CGF);
181 CodeGenFunction::JumpDest CurPoint =
182 CGF.getJumpDestInCurrentScope(".untied.next.");
183 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186 CGF.Builder.GetInsertBlock());
187 CGF.EmitBranchThroughCleanup(CurPoint);
188 CGF.EmitBlock(CurPoint.getBlock());
189 }
190 }
getNumberOfParts() const191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192 };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194 const VarDecl *ThreadIDVar,
195 const RegionCodeGenTy &CodeGen,
196 OpenMPDirectiveKind Kind, bool HasCancel,
197 const UntiedTaskActionTy &Action)
198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199 ThreadIDVar(ThreadIDVar), Action(Action) {
200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201 }
202
203 /// Get a variable or parameter for storing global thread id
204 /// inside OpenMP construct.
getThreadIDVariable() const205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206
207 /// Get an LValue for the current ThreadID variable.
208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209
210 /// Get the name of the capture helper.
getHelperName() const211 StringRef getHelperName() const override { return ".omp_outlined."; }
212
emitUntiedSwitch(CodeGenFunction & CGF)213 void emitUntiedSwitch(CodeGenFunction &CGF) override {
214 Action.emitUntiedSwitch(CGF);
215 }
216
classof(const CGCapturedStmtInfo * Info)217 static bool classof(const CGCapturedStmtInfo *Info) {
218 return CGOpenMPRegionInfo::classof(Info) &&
219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220 TaskOutlinedRegion;
221 }
222
223 private:
224 /// A variable or parameter storing global thread id for OpenMP
225 /// constructs.
226 const VarDecl *ThreadIDVar;
227 /// Action for emitting code for untied tasks.
228 const UntiedTaskActionTy &Action;
229 };
230
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236 const RegionCodeGenTy &CodeGen,
237 OpenMPDirectiveKind Kind, bool HasCancel)
238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239 OldCSI(OldCSI),
240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241
242 // Retrieve the value of the context parameter.
getContextValue() const243 llvm::Value *getContextValue() const override {
244 if (OuterRegionInfo)
245 return OuterRegionInfo->getContextValue();
246 llvm_unreachable("No context value for inlined OpenMP region");
247 }
248
setContextValue(llvm::Value * V)249 void setContextValue(llvm::Value *V) override {
250 if (OuterRegionInfo) {
251 OuterRegionInfo->setContextValue(V);
252 return;
253 }
254 llvm_unreachable("No context value for inlined OpenMP region");
255 }
256
257 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const258 const FieldDecl *lookup(const VarDecl *VD) const override {
259 if (OuterRegionInfo)
260 return OuterRegionInfo->lookup(VD);
261 // If there is no outer outlined region,no need to lookup in a list of
262 // captured variables, we can use the original one.
263 return nullptr;
264 }
265
getThisFieldDecl() const266 FieldDecl *getThisFieldDecl() const override {
267 if (OuterRegionInfo)
268 return OuterRegionInfo->getThisFieldDecl();
269 return nullptr;
270 }
271
272 /// Get a variable or parameter for storing global thread id
273 /// inside OpenMP construct.
getThreadIDVariable() const274 const VarDecl *getThreadIDVariable() const override {
275 if (OuterRegionInfo)
276 return OuterRegionInfo->getThreadIDVariable();
277 return nullptr;
278 }
279
280 /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282 if (OuterRegionInfo)
283 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284 llvm_unreachable("No LValue for inlined OpenMP construct");
285 }
286
287 /// Get the name of the capture helper.
getHelperName() const288 StringRef getHelperName() const override {
289 if (auto *OuterRegionInfo = getOldCSI())
290 return OuterRegionInfo->getHelperName();
291 llvm_unreachable("No helper name for inlined OpenMP construct");
292 }
293
emitUntiedSwitch(CodeGenFunction & CGF)294 void emitUntiedSwitch(CodeGenFunction &CGF) override {
295 if (OuterRegionInfo)
296 OuterRegionInfo->emitUntiedSwitch(CGF);
297 }
298
getOldCSI() const299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300
classof(const CGCapturedStmtInfo * Info)301 static bool classof(const CGCapturedStmtInfo *Info) {
302 return CGOpenMPRegionInfo::classof(Info) &&
303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304 }
305
306 ~CGOpenMPInlinedRegionInfo() override = default;
307
308 private:
309 /// CodeGen info about outer OpenMP region.
310 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311 CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322 const RegionCodeGenTy &CodeGen, StringRef HelperName)
323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324 /*HasCancel=*/false),
325 HelperName(HelperName) {}
326
327 /// This is unused for target regions because each starts executing
328 /// with a single thread.
getThreadIDVariable() const329 const VarDecl *getThreadIDVariable() const override { return nullptr; }
330
331 /// Get the name of the capture helper.
getHelperName() const332 StringRef getHelperName() const override { return HelperName; }
333
classof(const CGCapturedStmtInfo * Info)334 static bool classof(const CGCapturedStmtInfo *Info) {
335 return CGOpenMPRegionInfo::classof(Info) &&
336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337 }
338
339 private:
340 StringRef HelperName;
341 };
342
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344 llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352 OMPD_unknown,
353 /*HasCancel=*/false),
354 PrivScope(CGF) {
355 // Make sure the globals captured in the provided statement are local by
356 // using the privatization logic. We assume the same variable is not
357 // captured more than once.
358 for (const auto &C : CS.captures()) {
359 if (!C.capturesVariable() && !C.capturesVariableByCopy())
360 continue;
361
362 const VarDecl *VD = C.getCapturedVar();
363 if (VD->isLocalVarDeclOrParm())
364 continue;
365
366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367 /*RefersToEnclosingVariableOrCapture=*/false,
368 VD->getType().getNonReferenceType(), VK_LValue,
369 C.getLocation());
370 PrivScope.addPrivate(
371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372 }
373 (void)PrivScope.Privatize();
374 }
375
376 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const377 const FieldDecl *lookup(const VarDecl *VD) const override {
378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379 return FD;
380 return nullptr;
381 }
382
383 /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385 llvm_unreachable("No body for expressions");
386 }
387
388 /// Get a variable or parameter for storing global thread id
389 /// inside OpenMP construct.
getThreadIDVariable() const390 const VarDecl *getThreadIDVariable() const override {
391 llvm_unreachable("No thread id for expressions");
392 }
393
394 /// Get the name of the capture helper.
getHelperName() const395 StringRef getHelperName() const override {
396 llvm_unreachable("No helper name for expressions");
397 }
398
classof(const CGCapturedStmtInfo * Info)399 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400
401 private:
402 /// Private scope to capture global variables.
403 CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408 CodeGenFunction &CGF;
409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410 FieldDecl *LambdaThisCaptureField = nullptr;
411 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412
413 public:
414 /// Constructs region for combined constructs.
415 /// \param CodeGen Code generation sequence for combined directives. Includes
416 /// a list of functions used for code generation of implicitly inlined
417 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419 OpenMPDirectiveKind Kind, bool HasCancel)
420 : CGF(CGF) {
421 // Start emission for the construct.
422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426 CGF.LambdaThisCaptureField = nullptr;
427 BlockInfo = CGF.BlockInfo;
428 CGF.BlockInfo = nullptr;
429 }
430
~InlinedOpenMPRegionRAII()431 ~InlinedOpenMPRegionRAII() {
432 // Restore original CapturedStmtInfo only if we're done with code emission.
433 auto *OldCSI =
434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435 delete CGF.CapturedStmtInfo;
436 CGF.CapturedStmtInfo = OldCSI;
437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439 CGF.BlockInfo = BlockInfo;
440 }
441 };
442
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447 /// Use trampoline for internal microtask.
448 OMP_IDENT_IMD = 0x01,
449 /// Use c-style ident structure.
450 OMP_IDENT_KMPC = 0x02,
451 /// Atomic reduction option for kmpc_reduce.
452 OMP_ATOMIC_REDUCE = 0x10,
453 /// Explicit 'barrier' directive.
454 OMP_IDENT_BARRIER_EXPL = 0x20,
455 /// Implicit barrier in code.
456 OMP_IDENT_BARRIER_IMPL = 0x40,
457 /// Implicit barrier in 'for' directive.
458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459 /// Implicit barrier in 'sections' directive.
460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461 /// Implicit barrier in 'single' directive.
462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463 /// Call of __kmp_for_static_init for static loop.
464 OMP_IDENT_WORK_LOOP = 0x200,
465 /// Call of __kmp_for_static_init for sections.
466 OMP_IDENT_WORK_SECTIONS = 0x400,
467 /// Call of __kmp_for_static_init for distribute.
468 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476 /// flag undefined.
477 OMP_REQ_UNDEFINED = 0x000,
478 /// no requires clause present.
479 OMP_REQ_NONE = 0x001,
480 /// reverse_offload clause.
481 OMP_REQ_REVERSE_OFFLOAD = 0x002,
482 /// unified_address clause.
483 OMP_REQ_UNIFIED_ADDRESS = 0x004,
484 /// unified_shared_memory clause.
485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
486 /// dynamic_allocators clause.
487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490
491 enum OpenMPOffloadingReservedDeviceIDs {
492 /// Device ID if the device was not defined, runtime should get it
493 /// from environment variables in the spec.
494 OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 /// kmp_int32 reserved_1; /**< might be used in Fortran;
504 /// see above */
505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
506 /// KMP_IDENT_KMPC identifies this union
507 /// member */
508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
509 /// see above */
510 ///#if USE_ITT_BUILD
511 /// /* but currently used for storing
512 /// region-specific ITT */
513 /// /* contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
516 /// C++ */
517 /// char const *psource; /**< String describing the source location.
518 /// The string is composed of semi-colon separated
519 // fields which describe the source file,
520 /// the function and a pair of line numbers that
521 /// delimit the construct.
522 /// */
523 /// } ident_t;
524 enum IdentFieldIndex {
525 /// might be used in Fortran
526 IdentField_Reserved_1,
527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528 IdentField_Flags,
529 /// Not really used in Fortran any more
530 IdentField_Reserved_2,
531 /// Source[4] in Fortran, do not use for C++
532 IdentField_Reserved_3,
533 /// String describing the source location. The string is composed of
534 /// semi-colon separated fields which describe the source file, the function
535 /// and a pair of line numbers that delimit the construct.
536 IdentField_PSource
537 };
538
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542 /// Lower bound for default (unordered) versions.
543 OMP_sch_lower = 32,
544 OMP_sch_static_chunked = 33,
545 OMP_sch_static = 34,
546 OMP_sch_dynamic_chunked = 35,
547 OMP_sch_guided_chunked = 36,
548 OMP_sch_runtime = 37,
549 OMP_sch_auto = 38,
550 /// static with chunk adjustment (e.g., simd)
551 OMP_sch_static_balanced_chunked = 45,
552 /// Lower bound for 'ordered' versions.
553 OMP_ord_lower = 64,
554 OMP_ord_static_chunked = 65,
555 OMP_ord_static = 66,
556 OMP_ord_dynamic_chunked = 67,
557 OMP_ord_guided_chunked = 68,
558 OMP_ord_runtime = 69,
559 OMP_ord_auto = 70,
560 OMP_sch_default = OMP_sch_static,
561 /// dist_schedule types
562 OMP_dist_sch_static_chunked = 91,
563 OMP_dist_sch_static = 92,
564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565 /// Set if the monotonic schedule modifier was present.
566 OMP_sch_modifier_monotonic = (1 << 29),
567 /// Set if the nonmonotonic schedule modifier was present.
568 OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574 PrePostActionTy *Action;
575
576 public:
CleanupTy(PrePostActionTy * Action)577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579 if (!CGF.HaveInsertPoint())
580 return;
581 Action->Exit(CGF);
582 }
583 };
584
585 } // anonymous namespace
586
operator ()(CodeGenFunction & CGF) const587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588 CodeGenFunction::RunCleanupsScope Scope(CGF);
589 if (PrePostAction) {
590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591 Callback(CodeGen, CGF, *PrePostAction);
592 } else {
593 PrePostActionTy Action;
594 Callback(CodeGen, CGF, Action);
595 }
596 }
597
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)601 getReductionInit(const Expr *ReductionOp) {
602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604 if (const auto *DRE =
605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607 return DRD;
608 return nullptr;
609 }
610
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612 const OMPDeclareReductionDecl *DRD,
613 const Expr *InitOp,
614 Address Private, Address Original,
615 QualType Ty) {
616 if (DRD->getInitializer()) {
617 std::pair<llvm::Function *, llvm::Function *> Reduction =
618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619 const auto *CE = cast<CallExpr>(InitOp);
620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623 const auto *LHSDRE =
624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625 const auto *RHSDRE =
626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629 [=]() { return Private; });
630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631 [=]() { return Original; });
632 (void)PrivateScope.Privatize();
633 RValue Func = RValue::get(Reduction.second);
634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635 CGF.EmitIgnoredExpr(InitOp);
636 } else {
637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639 auto *GV = new llvm::GlobalVariable(
640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641 llvm::GlobalValue::PrivateLinkage, Init, Name);
642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643 RValue InitRVal;
644 switch (CGF.getEvaluationKind(Ty)) {
645 case TEK_Scalar:
646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647 break;
648 case TEK_Complex:
649 InitRVal =
650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651 break;
652 case TEK_Aggregate:
653 InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654 break;
655 }
656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659 /*IsInitializer=*/false);
660 }
661 }
662
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669 QualType Type, bool EmitDeclareReductionInit,
670 const Expr *Init,
671 const OMPDeclareReductionDecl *DRD,
672 Address SrcAddr = Address::invalid()) {
673 // Perform element-by-element initialization.
674 QualType ElementTy;
675
676 // Drill down to the base element type on both arrays.
677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679 DestAddr =
680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681 if (DRD)
682 SrcAddr =
683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684
685 llvm::Value *SrcBegin = nullptr;
686 if (DRD)
687 SrcBegin = SrcAddr.getPointer();
688 llvm::Value *DestBegin = DestAddr.getPointer();
689 // Cast from pointer to array type to pointer to single element.
690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691 // The basic structure here is a while-do loop.
692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694 llvm::Value *IsEmpty =
695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697
698 // Enter the loop body, making that address the current address.
699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700 CGF.EmitBlock(BodyBB);
701
702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703
704 llvm::PHINode *SrcElementPHI = nullptr;
705 Address SrcElementCurrent = Address::invalid();
706 if (DRD) {
707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708 "omp.arraycpy.srcElementPast");
709 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710 SrcElementCurrent =
711 Address(SrcElementPHI,
712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713 }
714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716 DestElementPHI->addIncoming(DestBegin, EntryBB);
717 Address DestElementCurrent =
718 Address(DestElementPHI,
719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720
721 // Emit copy.
722 {
723 CodeGenFunction::RunCleanupsScope InitScope(CGF);
724 if (EmitDeclareReductionInit) {
725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726 SrcElementCurrent, ElementTy);
727 } else
728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729 /*IsInitializer=*/false);
730 }
731
732 if (DRD) {
733 // Shift the address forward by one element.
734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737 }
738
739 // Shift the address forward by one element.
740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742 // Check whether we've reached the end.
743 llvm::Value *Done =
744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747
748 // Done.
749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753 return CGF.EmitOMPSharedLValue(E);
754 }
755
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757 const Expr *E) {
758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760 return LValue();
761 }
762
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,const OMPDeclareReductionDecl * DRD)763 void ReductionCodeGen::emitAggregateInitialization(
764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765 const OMPDeclareReductionDecl *DRD) {
766 // Emit VarDecl with copy init for arrays.
767 // Get the address of the original variable captured in current
768 // captured region.
769 const auto *PrivateVD =
770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771 bool EmitDeclareReductionInit =
772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774 EmitDeclareReductionInit,
775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776 : PrivateVD->getInit(),
777 DRD, SharedLVal.getAddress(CGF));
778 }
779
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781 ArrayRef<const Expr *> Origs,
782 ArrayRef<const Expr *> Privates,
783 ArrayRef<const Expr *> ReductionOps) {
784 ClausesData.reserve(Shareds.size());
785 SharedAddresses.reserve(Shareds.size());
786 Sizes.reserve(Shareds.size());
787 BaseDecls.reserve(Shareds.size());
788 const auto *IOrig = Origs.begin();
789 const auto *IPriv = Privates.begin();
790 const auto *IRed = ReductionOps.begin();
791 for (const Expr *Ref : Shareds) {
792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793 std::advance(IOrig, 1);
794 std::advance(IPriv, 1);
795 std::advance(IRed, 1);
796 }
797 }
798
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801 "Number of generated lvalues must be exactly N.");
802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804 SharedAddresses.emplace_back(First, Second);
805 if (ClausesData[N].Shared == ClausesData[N].Ref) {
806 OrigAddresses.emplace_back(First, Second);
807 } else {
808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810 OrigAddresses.emplace_back(First, Second);
811 }
812 }
813
emitAggregateType(CodeGenFunction & CGF,unsigned N)814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815 const auto *PrivateVD =
816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817 QualType PrivateType = PrivateVD->getType();
818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819 if (!PrivateType->isVariablyModifiedType()) {
820 Sizes.emplace_back(
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822 nullptr);
823 return;
824 }
825 llvm::Value *Size;
826 llvm::Value *SizeInChars;
827 auto *ElemType =
828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829 ->getElementType();
830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831 if (AsArraySection) {
832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833 OrigAddresses[N].first.getPointer(CGF));
834 Size = CGF.Builder.CreateNUWAdd(
835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837 } else {
838 SizeInChars =
839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841 }
842 Sizes.emplace_back(SizeInChars, Size);
843 CodeGenFunction::OpaqueValueMapping OpaqueMap(
844 CGF,
845 cast<OpaqueValueExpr>(
846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847 RValue::get(Size));
848 CGF.EmitVariablyModifiedType(PrivateType);
849 }
850
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852 llvm::Value *Size) {
853 const auto *PrivateVD =
854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855 QualType PrivateType = PrivateVD->getType();
856 if (!PrivateType->isVariablyModifiedType()) {
857 assert(!Size && !Sizes[N].second &&
858 "Size should be nullptr for non-variably modified reduction "
859 "items.");
860 return;
861 }
862 CodeGenFunction::OpaqueValueMapping OpaqueMap(
863 CGF,
864 cast<OpaqueValueExpr>(
865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866 RValue::get(Size));
867 CGF.EmitVariablyModifiedType(PrivateType);
868 }
869
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)870 void ReductionCodeGen::emitInitialization(
871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873 assert(SharedAddresses.size() > N && "No variable was generated");
874 const auto *PrivateVD =
875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876 const OMPDeclareReductionDecl *DRD =
877 getReductionInit(ClausesData[N].ReductionOp);
878 QualType PrivateType = PrivateVD->getType();
879 PrivateAddr = CGF.Builder.CreateElementBitCast(
880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881 QualType SharedType = SharedAddresses[N].first.getType();
882 SharedLVal = CGF.MakeAddrLValue(
883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884 CGF.ConvertTypeForMem(SharedType)),
885 SharedType, SharedAddresses[N].first.getBaseInfo(),
886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888 if (DRD && DRD->getInitializer())
889 (void)DefaultInit(CGF);
890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892 (void)DefaultInit(CGF);
893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894 PrivateAddr, SharedLVal.getAddress(CGF),
895 SharedLVal.getType());
896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899 PrivateVD->getType().getQualifiers(),
900 /*IsInitializer=*/false);
901 }
902 }
903
needCleanups(unsigned N)904 bool ReductionCodeGen::needCleanups(unsigned N) {
905 const auto *PrivateVD =
906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907 QualType PrivateType = PrivateVD->getType();
908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909 return DTorKind != QualType::DK_none;
910 }
911
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913 Address PrivateAddr) {
914 const auto *PrivateVD =
915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916 QualType PrivateType = PrivateVD->getType();
917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918 if (needCleanups(N)) {
919 PrivateAddr = CGF.Builder.CreateElementBitCast(
920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922 }
923 }
924
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926 LValue BaseLV) {
927 BaseTy = BaseTy.getNonReferenceType();
928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932 } else {
933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935 }
936 BaseTy = BaseTy->getPointeeType();
937 }
938 return CGF.MakeAddrLValue(
939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940 CGF.ConvertTypeForMem(ElTy)),
941 BaseLV.getType(), BaseLV.getBaseInfo(),
942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,llvm::Type * BaseLVType,CharUnits BaseLVAlignment,llvm::Value * Addr)945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947 llvm::Value *Addr) {
948 Address Tmp = Address::invalid();
949 Address TopTmp = Address::invalid();
950 Address MostTopTmp = Address::invalid();
951 BaseTy = BaseTy.getNonReferenceType();
952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954 Tmp = CGF.CreateMemTemp(BaseTy);
955 if (TopTmp.isValid())
956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957 else
958 MostTopTmp = Tmp;
959 TopTmp = Tmp;
960 BaseTy = BaseTy->getPointeeType();
961 }
962 llvm::Type *Ty = BaseLVType;
963 if (Tmp.isValid())
964 Ty = Tmp.getElementType();
965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966 if (Tmp.isValid()) {
967 CGF.Builder.CreateStore(Addr, Tmp);
968 return MostTopTmp;
969 }
970 return Address(Addr, BaseLVAlignment);
971 }
972
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974 const VarDecl *OrigVD = nullptr;
975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978 Base = TempOASE->getBase()->IgnoreParenImpCasts();
979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980 Base = TempASE->getBase()->IgnoreParenImpCasts();
981 DE = cast<DeclRefExpr>(Base);
982 OrigVD = cast<VarDecl>(DE->getDecl());
983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986 Base = TempASE->getBase()->IgnoreParenImpCasts();
987 DE = cast<DeclRefExpr>(Base);
988 OrigVD = cast<VarDecl>(DE->getDecl());
989 }
990 return OrigVD;
991 }
992
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994 Address PrivateAddr) {
995 const DeclRefExpr *DE;
996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997 BaseDecls.emplace_back(OrigVD);
998 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999 LValue BaseLValue =
1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001 OriginalBaseLValue);
1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004 llvm::Value *PrivatePointer =
1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006 PrivateAddr.getPointer(),
1007 SharedAddresses[N].first.getAddress(CGF).getType());
1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009 return castToBase(CGF, OrigVD->getType(),
1010 SharedAddresses[N].first.getType(),
1011 OriginalBaseLValue.getAddress(CGF).getType(),
1012 OriginalBaseLValue.getAlignment(), Ptr);
1013 }
1014 BaseDecls.emplace_back(
1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016 return PrivateAddr;
1017 }
1018
usesReductionInitializer(unsigned N) const1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020 const OMPDeclareReductionDecl *DRD =
1021 getReductionInit(ClausesData[N].ReductionOp);
1022 return DRD && DRD->getInitializer();
1023 }
1024
getThreadIDVariableLValue(CodeGenFunction & CGF)1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026 return CGF.EmitLoadOfPointerLValue(
1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028 getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030
EmitBody(CodeGenFunction & CGF,const Stmt *)1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032 if (!CGF.HaveInsertPoint())
1033 return;
1034 // 1.2.2 OpenMP Language Terminology
1035 // Structured block - An executable statement with a single entry at the
1036 // top and a single exit at the bottom.
1037 // The point of exit cannot be a branch out of the structured block.
1038 // longjmp() and throw() must not violate the entry/exit criteria.
1039 CGF.EHStack.pushTerminate();
1040 CodeGen(CGF);
1041 CGF.EHStack.popTerminate();
1042 }
1043
getThreadIDVariableLValue(CodeGenFunction & CGF)1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045 CodeGenFunction &CGF) {
1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047 getThreadIDVariable()->getType(),
1048 AlignmentSource::Decl);
1049 }
1050
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052 QualType FieldTy) {
1053 auto *Field = FieldDecl::Create(
1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057 Field->setAccess(AS_public);
1058 DC->addDecl(Field);
1059 return Field;
1060 }
1061
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063 StringRef Separator)
1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067
1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069 OMPBuilder.initialize();
1070 loadOffloadInfoMetadata();
1071 }
1072
clear()1073 void CGOpenMPRuntime::clear() {
1074 InternalVars.clear();
1075 // Clean non-target variable declarations possibly used only in debug info.
1076 for (const auto &Data : EmittedNonTargetVariables) {
1077 if (!Data.getValue().pointsToAliveValue())
1078 continue;
1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080 if (!GV)
1081 continue;
1082 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083 continue;
1084 GV->eraseFromParent();
1085 }
1086 }
1087
getName(ArrayRef<StringRef> Parts) const1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089 SmallString<128> Buffer;
1090 llvm::raw_svector_ostream OS(Buffer);
1091 StringRef Sep = FirstSeparator;
1092 for (StringRef Part : Parts) {
1093 OS << Sep << Part;
1094 Sep = Separator;
1095 }
1096 return std::string(OS.str());
1097 }
1098
1099 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101 const Expr *CombinerInitializer, const VarDecl *In,
1102 const VarDecl *Out, bool IsCombiner) {
1103 // void .omp_combiner.(Ty *in, Ty *out);
1104 ASTContext &C = CGM.getContext();
1105 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106 FunctionArgList Args;
1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111 Args.push_back(&OmpOutParm);
1112 Args.push_back(&OmpInParm);
1113 const CGFunctionInfo &FnInfo =
1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116 std::string Name = CGM.getOpenMPRuntime().getName(
1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119 Name, &CGM.getModule());
1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121 if (CGM.getLangOpts().Optimize) {
1122 Fn->removeFnAttr(llvm::Attribute::NoInline);
1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125 }
1126 CodeGenFunction CGF(CGM);
1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130 Out->getLocation());
1131 CodeGenFunction::OMPPrivateScope Scope(CGF);
1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135 .getAddress(CGF);
1136 });
1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140 .getAddress(CGF);
1141 });
1142 (void)Scope.Privatize();
1143 if (!IsCombiner && Out->hasInit() &&
1144 !CGF.isTrivialInitializer(Out->getInit())) {
1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146 Out->getType().getQualifiers(),
1147 /*IsInitializer=*/true);
1148 }
1149 if (CombinerInitializer)
1150 CGF.EmitIgnoredExpr(CombinerInitializer);
1151 Scope.ForceCleanup();
1152 CGF.FinishFunction();
1153 return Fn;
1154 }
1155
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158 if (UDRMap.count(D) > 0)
1159 return;
1160 llvm::Function *Combiner = emitCombinerOrInitializer(
1161 CGM, D->getType(), D->getCombiner(),
1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164 /*IsCombiner=*/true);
1165 llvm::Function *Initializer = nullptr;
1166 if (const Expr *Init = D->getInitializer()) {
1167 Initializer = emitCombinerOrInitializer(
1168 CGM, D->getType(),
1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170 : nullptr,
1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173 /*IsCombiner=*/false);
1174 }
1175 UDRMap.try_emplace(D, Combiner, Initializer);
1176 if (CGF) {
1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178 Decls.second.push_back(D);
1179 }
1180 }
1181
1182 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184 auto I = UDRMap.find(D);
1185 if (I != UDRMap.end())
1186 return I->second;
1187 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188 return UDRMap.lookup(D);
1189 }
1190
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
PushAndPopStackRAII__anona637da6d0811::PushAndPopStackRAII1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196 bool HasCancel)
1197 : OMPBuilder(OMPBuilder) {
1198 if (!OMPBuilder)
1199 return;
1200
1201 // The following callback is the crucial part of clangs cleanup process.
1202 //
1203 // NOTE:
1204 // Once the OpenMPIRBuilder is used to create parallel regions (and
1205 // similar), the cancellation destination (Dest below) is determined via
1206 // IP. That means if we have variables to finalize we split the block at IP,
1207 // use the new block (=BB) as destination to build a JumpDest (via
1208 // getJumpDestInCurrentScope(BB)) which then is fed to
1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210 // to push & pop an FinalizationInfo object.
1211 // The FiniCB will still be needed but at the point where the
1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214 assert(IP.getBlock()->end() == IP.getPoint() &&
1215 "Clang CG should cause non-terminated block!");
1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217 CGF.Builder.restoreIP(IP);
1218 CodeGenFunction::JumpDest Dest =
1219 CGF.getOMPCancelDestination(OMPD_parallel);
1220 CGF.EmitBranchThroughCleanup(Dest);
1221 };
1222
1223 // TODO: Remove this once we emit parallel regions through the
1224 // OpenMPIRBuilder as it can do this setup internally.
1225 llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226 {FiniCB, OMPD_parallel, HasCancel});
1227 OMPBuilder->pushFinalizationCB(std::move(FI));
1228 }
~PushAndPopStackRAII__anona637da6d0811::PushAndPopStackRAII1229 ~PushAndPopStackRAII() {
1230 if (OMPBuilder)
1231 OMPBuilder->popFinalizationCB();
1232 }
1233 llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241 assert(ThreadIDVar->getType()->isPointerType() &&
1242 "thread id variable must be of type kmp_int32 *");
1243 CodeGenFunction CGF(CGM, true);
1244 bool HasCancel = false;
1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246 HasCancel = OPD->hasCancel();
1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248 HasCancel = OPD->hasCancel();
1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250 HasCancel = OPSD->hasCancel();
1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252 HasCancel = OPFD->hasCancel();
1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254 HasCancel = OPFD->hasCancel();
1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256 HasCancel = OPFD->hasCancel();
1257 else if (const auto *OPFD =
1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259 HasCancel = OPFD->hasCancel();
1260 else if (const auto *OPFD =
1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262 HasCancel = OPFD->hasCancel();
1263
1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265 // parallel region to make cancellation barriers work properly.
1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269 HasCancel, OutlinedHelperName);
1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278 return emitParallelOrTeamsOutlinedFunction(
1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286 return emitParallelOrTeamsOutlinedFunction(
1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294 bool Tied, unsigned &NumberOfParts) {
1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296 PrePostActionTy &) {
1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299 llvm::Value *TaskArgs[] = {
1300 UpLoc, ThreadID,
1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302 TaskTVar->getType()->castAs<PointerType>())
1303 .getPointer(CGF)};
1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305 CGM.getModule(), OMPRTL___kmpc_omp_task),
1306 TaskArgs);
1307 };
1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309 UntiedCodeGen);
1310 CodeGen.setAction(Action);
1311 assert(!ThreadIDVar->getType()->isPointerType() &&
1312 "thread id variable must be of type kmp_int32 for tasks");
1313 const OpenMPDirectiveKind Region =
1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315 : OMPD_task;
1316 const CapturedStmt *CS = D.getCapturedStmt(Region);
1317 bool HasCancel = false;
1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319 HasCancel = TD->hasCancel();
1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323 HasCancel = TD->hasCancel();
1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325 HasCancel = TD->hasCancel();
1326
1327 CodeGenFunction CGF(CGM, true);
1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329 InnermostKind, HasCancel, Action);
1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332 if (!Tied)
1333 NumberOfParts = Action.getNumberOfParts();
1334 return Res;
1335 }
1336
buildStructValue(ConstantStructBuilder & Fields,CodeGenModule & CGM,const RecordDecl * RD,const CGRecordLayout & RL,ArrayRef<llvm::Constant * > Data)1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338 const RecordDecl *RD, const CGRecordLayout &RL,
1339 ArrayRef<llvm::Constant *> Data) {
1340 llvm::StructType *StructTy = RL.getLLVMType();
1341 unsigned PrevIdx = 0;
1342 ConstantInitBuilder CIBuilder(CGM);
1343 auto DI = Data.begin();
1344 for (const FieldDecl *FD : RD->fields()) {
1345 unsigned Idx = RL.getLLVMFieldNo(FD);
1346 // Fill the alignment.
1347 for (unsigned I = PrevIdx; I < Idx; ++I)
1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349 PrevIdx = Idx + 1;
1350 Fields.add(*DI);
1351 ++DI;
1352 }
1353 }
1354
1355 template <class... As>
1356 static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule & CGM,QualType Ty,bool IsConstant,ArrayRef<llvm::Constant * > Data,const Twine & Name,As &&...Args)1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359 As &&... Args) {
1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362 ConstantInitBuilder CIBuilder(CGM);
1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364 buildStructValue(Fields, CGM, RD, RL, Data);
1365 return Fields.finishAndCreateGlobal(
1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367 std::forward<As>(Args)...);
1368 }
1369
1370 template <typename T>
1371 static void
createConstantGlobalStructAndAddToParent(CodeGenModule & CGM,QualType Ty,ArrayRef<llvm::Constant * > Data,T & Parent)1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373 ArrayRef<llvm::Constant *> Data,
1374 T &Parent) {
1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378 buildStructValue(Fields, CGM, RD, RL, Data);
1379 Fields.finishAndAddTo(Parent);
1380 }
1381
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383 bool AtCurrentPoint) {
1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386
1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388 if (AtCurrentPoint) {
1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391 } else {
1392 Elem.second.ServiceInsertPt =
1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395 }
1396 }
1397
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400 if (Elem.second.ServiceInsertPt) {
1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402 Elem.second.ServiceInsertPt = nullptr;
1403 Ptr->eraseFromParent();
1404 }
1405 }
1406
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1407 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1408 SourceLocation Loc,
1409 unsigned Flags) {
1410 llvm::Constant *SrcLocStr;
1411 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1412 Loc.isInvalid()) {
1413 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1414 } else {
1415 std::string FunctionName = "";
1416 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1417 FunctionName = FD->getQualifiedNameAsString();
1418 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1419 const char *FileName = PLoc.getFilename();
1420 unsigned Line = PLoc.getLine();
1421 unsigned Column = PLoc.getColumn();
1422 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1423 Line, Column);
1424 }
1425 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1426 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1427 Reserved2Flags);
1428 }
1429
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1430 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1431 SourceLocation Loc) {
1432 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1433
1434 llvm::Value *ThreadID = nullptr;
1435 // Check whether we've already cached a load of the thread id in this
1436 // function.
1437 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1438 if (I != OpenMPLocThreadIDMap.end()) {
1439 ThreadID = I->second.ThreadID;
1440 if (ThreadID != nullptr)
1441 return ThreadID;
1442 }
1443 // If exceptions are enabled, do not use parameter to avoid possible crash.
1444 if (auto *OMPRegionInfo =
1445 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1446 if (OMPRegionInfo->getThreadIDVariable()) {
1447 // Check if this an outlined function with thread id passed as argument.
1448 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1449 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1450 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1451 !CGF.getLangOpts().CXXExceptions ||
1452 CGF.Builder.GetInsertBlock() == TopBlock ||
1453 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1454 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1455 TopBlock ||
1456 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1457 CGF.Builder.GetInsertBlock()) {
1458 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1459 // If value loaded in entry block, cache it and use it everywhere in
1460 // function.
1461 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1462 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1463 Elem.second.ThreadID = ThreadID;
1464 }
1465 return ThreadID;
1466 }
1467 }
1468 }
1469
1470 // This is not an outlined function region - need to call __kmpc_int32
1471 // kmpc_global_thread_num(ident_t *loc).
1472 // Generate thread id value and cache this value for use across the
1473 // function.
1474 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1475 if (!Elem.second.ServiceInsertPt)
1476 setLocThreadIdInsertPt(CGF);
1477 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1478 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1479 llvm::CallInst *Call = CGF.Builder.CreateCall(
1480 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1481 OMPRTL___kmpc_global_thread_num),
1482 emitUpdateLocation(CGF, Loc));
1483 Call->setCallingConv(CGF.getRuntimeCC());
1484 Elem.second.ThreadID = Call;
1485 return Call;
1486 }
1487
functionFinished(CodeGenFunction & CGF)1488 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1489 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1490 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1491 clearLocThreadIdInsertPt(CGF);
1492 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1493 }
1494 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1495 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1496 UDRMap.erase(D);
1497 FunctionUDRMap.erase(CGF.CurFn);
1498 }
1499 auto I = FunctionUDMMap.find(CGF.CurFn);
1500 if (I != FunctionUDMMap.end()) {
1501 for(const auto *D : I->second)
1502 UDMMap.erase(D);
1503 FunctionUDMMap.erase(I);
1504 }
1505 LastprivateConditionalToTypes.erase(CGF.CurFn);
1506 }
1507
getIdentTyPointerTy()1508 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1509 return OMPBuilder.IdentPtr;
1510 }
1511
getKmpc_MicroPointerTy()1512 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1513 if (!Kmpc_MicroTy) {
1514 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1515 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1516 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1517 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1518 }
1519 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1520 }
1521
1522 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned)1523 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1524 assert((IVSize == 32 || IVSize == 64) &&
1525 "IV size is not compatible with the omp runtime");
1526 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1527 : "__kmpc_for_static_init_4u")
1528 : (IVSigned ? "__kmpc_for_static_init_8"
1529 : "__kmpc_for_static_init_8u");
1530 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1531 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1532 llvm::Type *TypeParams[] = {
1533 getIdentTyPointerTy(), // loc
1534 CGM.Int32Ty, // tid
1535 CGM.Int32Ty, // schedtype
1536 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1537 PtrTy, // p_lower
1538 PtrTy, // p_upper
1539 PtrTy, // p_stride
1540 ITy, // incr
1541 ITy // chunk
1542 };
1543 auto *FnTy =
1544 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1545 return CGM.CreateRuntimeFunction(FnTy, Name);
1546 }
1547
1548 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1549 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1550 assert((IVSize == 32 || IVSize == 64) &&
1551 "IV size is not compatible with the omp runtime");
1552 StringRef Name =
1553 IVSize == 32
1554 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1555 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1556 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1557 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1558 CGM.Int32Ty, // tid
1559 CGM.Int32Ty, // schedtype
1560 ITy, // lower
1561 ITy, // upper
1562 ITy, // stride
1563 ITy // chunk
1564 };
1565 auto *FnTy =
1566 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1567 return CGM.CreateRuntimeFunction(FnTy, Name);
1568 }
1569
1570 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1571 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1572 assert((IVSize == 32 || IVSize == 64) &&
1573 "IV size is not compatible with the omp runtime");
1574 StringRef Name =
1575 IVSize == 32
1576 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1577 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1578 llvm::Type *TypeParams[] = {
1579 getIdentTyPointerTy(), // loc
1580 CGM.Int32Ty, // tid
1581 };
1582 auto *FnTy =
1583 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1584 return CGM.CreateRuntimeFunction(FnTy, Name);
1585 }
1586
1587 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1588 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1589 assert((IVSize == 32 || IVSize == 64) &&
1590 "IV size is not compatible with the omp runtime");
1591 StringRef Name =
1592 IVSize == 32
1593 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1594 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1595 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1596 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1597 llvm::Type *TypeParams[] = {
1598 getIdentTyPointerTy(), // loc
1599 CGM.Int32Ty, // tid
1600 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1601 PtrTy, // p_lower
1602 PtrTy, // p_upper
1603 PtrTy // p_stride
1604 };
1605 auto *FnTy =
1606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1607 return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609
1610 /// Obtain information that uniquely identifies a target entry. This
1611 /// consists of the file and device IDs as well as line number associated with
1612 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)1613 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1614 unsigned &DeviceID, unsigned &FileID,
1615 unsigned &LineNum) {
1616 SourceManager &SM = C.getSourceManager();
1617
1618 // The loc should be always valid and have a file ID (the user cannot use
1619 // #pragma directives in macros)
1620
1621 assert(Loc.isValid() && "Source location is expected to be always valid.");
1622
1623 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1624 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1625
1626 llvm::sys::fs::UniqueID ID;
1627 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1628 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1629 << PLoc.getFilename() << EC.message();
1630
1631 DeviceID = ID.getDevice();
1632 FileID = ID.getFile();
1633 LineNum = PLoc.getLine();
1634 }
1635
getAddrOfDeclareTargetVar(const VarDecl * VD)1636 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1637 if (CGM.getLangOpts().OpenMPSimd)
1638 return Address::invalid();
1639 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1640 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1641 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1642 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1643 HasRequiresUnifiedSharedMemory))) {
1644 SmallString<64> PtrName;
1645 {
1646 llvm::raw_svector_ostream OS(PtrName);
1647 OS << CGM.getMangledName(GlobalDecl(VD));
1648 if (!VD->isExternallyVisible()) {
1649 unsigned DeviceID, FileID, Line;
1650 getTargetEntryUniqueInfo(CGM.getContext(),
1651 VD->getCanonicalDecl()->getBeginLoc(),
1652 DeviceID, FileID, Line);
1653 OS << llvm::format("_%x", FileID);
1654 }
1655 OS << "_decl_tgt_ref_ptr";
1656 }
1657 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1658 if (!Ptr) {
1659 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1660 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1661 PtrName);
1662
1663 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1664 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1665
1666 if (!CGM.getLangOpts().OpenMPIsDevice)
1667 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1668 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1669 }
1670 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1671 }
1672 return Address::invalid();
1673 }
1674
1675 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1676 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1677 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1678 !CGM.getContext().getTargetInfo().isTLSSupported());
1679 // Lookup the entry, lazily creating it if necessary.
1680 std::string Suffix = getName({"cache", ""});
1681 return getOrCreateInternalVariable(
1682 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1683 }
1684
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1685 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1686 const VarDecl *VD,
1687 Address VDAddr,
1688 SourceLocation Loc) {
1689 if (CGM.getLangOpts().OpenMPUseTLS &&
1690 CGM.getContext().getTargetInfo().isTLSSupported())
1691 return VDAddr;
1692
1693 llvm::Type *VarTy = VDAddr.getElementType();
1694 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1695 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1696 CGM.Int8PtrTy),
1697 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1698 getOrCreateThreadPrivateCache(VD)};
1699 return Address(CGF.EmitRuntimeCall(
1700 OMPBuilder.getOrCreateRuntimeFunction(
1701 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1702 Args),
1703 VDAddr.getAlignment());
1704 }
1705
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1706 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1707 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1708 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1709 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1710 // library.
1711 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1712 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1713 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1714 OMPLoc);
1715 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1716 // to register constructor/destructor for variable.
1717 llvm::Value *Args[] = {
1718 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1719 Ctor, CopyCtor, Dtor};
1720 CGF.EmitRuntimeCall(
1721 OMPBuilder.getOrCreateRuntimeFunction(
1722 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1723 Args);
1724 }
1725
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1726 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1727 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1728 bool PerformInit, CodeGenFunction *CGF) {
1729 if (CGM.getLangOpts().OpenMPUseTLS &&
1730 CGM.getContext().getTargetInfo().isTLSSupported())
1731 return nullptr;
1732
1733 VD = VD->getDefinition(CGM.getContext());
1734 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1735 QualType ASTTy = VD->getType();
1736
1737 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1738 const Expr *Init = VD->getAnyInitializer();
1739 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1740 // Generate function that re-emits the declaration's initializer into the
1741 // threadprivate copy of the variable VD
1742 CodeGenFunction CtorCGF(CGM);
1743 FunctionArgList Args;
1744 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1745 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1746 ImplicitParamDecl::Other);
1747 Args.push_back(&Dst);
1748
1749 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1750 CGM.getContext().VoidPtrTy, Args);
1751 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1752 std::string Name = getName({"__kmpc_global_ctor_", ""});
1753 llvm::Function *Fn =
1754 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1755 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1756 Args, Loc, Loc);
1757 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1758 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1759 CGM.getContext().VoidPtrTy, Dst.getLocation());
1760 Address Arg = Address(ArgVal, VDAddr.getAlignment());
1761 Arg = CtorCGF.Builder.CreateElementBitCast(
1762 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1763 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1764 /*IsInitializer=*/true);
1765 ArgVal = CtorCGF.EmitLoadOfScalar(
1766 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1767 CGM.getContext().VoidPtrTy, Dst.getLocation());
1768 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1769 CtorCGF.FinishFunction();
1770 Ctor = Fn;
1771 }
1772 if (VD->getType().isDestructedType() != QualType::DK_none) {
1773 // Generate function that emits destructor call for the threadprivate copy
1774 // of the variable VD
1775 CodeGenFunction DtorCGF(CGM);
1776 FunctionArgList Args;
1777 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1778 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1779 ImplicitParamDecl::Other);
1780 Args.push_back(&Dst);
1781
1782 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1783 CGM.getContext().VoidTy, Args);
1784 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1785 std::string Name = getName({"__kmpc_global_dtor_", ""});
1786 llvm::Function *Fn =
1787 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1788 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1789 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1790 Loc, Loc);
1791 // Create a scope with an artificial location for the body of this function.
1792 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1793 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1794 DtorCGF.GetAddrOfLocalVar(&Dst),
1795 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1796 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1797 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1798 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1799 DtorCGF.FinishFunction();
1800 Dtor = Fn;
1801 }
1802 // Do not emit init function if it is not required.
1803 if (!Ctor && !Dtor)
1804 return nullptr;
1805
1806 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1807 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1808 /*isVarArg=*/false)
1809 ->getPointerTo();
1810 // Copying constructor for the threadprivate variable.
1811 // Must be NULL - reserved by runtime, but currently it requires that this
1812 // parameter is always NULL. Otherwise it fires assertion.
1813 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1814 if (Ctor == nullptr) {
1815 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1816 /*isVarArg=*/false)
1817 ->getPointerTo();
1818 Ctor = llvm::Constant::getNullValue(CtorTy);
1819 }
1820 if (Dtor == nullptr) {
1821 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1822 /*isVarArg=*/false)
1823 ->getPointerTo();
1824 Dtor = llvm::Constant::getNullValue(DtorTy);
1825 }
1826 if (!CGF) {
1827 auto *InitFunctionTy =
1828 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1829 std::string Name = getName({"__omp_threadprivate_init_", ""});
1830 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1831 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1832 CodeGenFunction InitCGF(CGM);
1833 FunctionArgList ArgList;
1834 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1835 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1836 Loc, Loc);
1837 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1838 InitCGF.FinishFunction();
1839 return InitFunction;
1840 }
1841 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1842 }
1843 return nullptr;
1844 }
1845
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)1846 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1847 llvm::GlobalVariable *Addr,
1848 bool PerformInit) {
1849 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1850 !CGM.getLangOpts().OpenMPIsDevice)
1851 return false;
1852 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1853 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1854 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1855 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1856 HasRequiresUnifiedSharedMemory))
1857 return CGM.getLangOpts().OpenMPIsDevice;
1858 VD = VD->getDefinition(CGM.getContext());
1859 assert(VD && "Unknown VarDecl");
1860
1861 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1862 return CGM.getLangOpts().OpenMPIsDevice;
1863
1864 QualType ASTTy = VD->getType();
1865 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1866
1867 // Produce the unique prefix to identify the new target regions. We use
1868 // the source location of the variable declaration which we know to not
1869 // conflict with any target region.
1870 unsigned DeviceID;
1871 unsigned FileID;
1872 unsigned Line;
1873 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1874 SmallString<128> Buffer, Out;
1875 {
1876 llvm::raw_svector_ostream OS(Buffer);
1877 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1878 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1879 }
1880
1881 const Expr *Init = VD->getAnyInitializer();
1882 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1883 llvm::Constant *Ctor;
1884 llvm::Constant *ID;
1885 if (CGM.getLangOpts().OpenMPIsDevice) {
1886 // Generate function that re-emits the declaration's initializer into
1887 // the threadprivate copy of the variable VD
1888 CodeGenFunction CtorCGF(CGM);
1889
1890 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1891 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1892 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1893 FTy, Twine(Buffer, "_ctor"), FI, Loc);
1894 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1895 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1896 FunctionArgList(), Loc, Loc);
1897 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1898 CtorCGF.EmitAnyExprToMem(Init,
1899 Address(Addr, CGM.getContext().getDeclAlign(VD)),
1900 Init->getType().getQualifiers(),
1901 /*IsInitializer=*/true);
1902 CtorCGF.FinishFunction();
1903 Ctor = Fn;
1904 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1906 } else {
1907 Ctor = new llvm::GlobalVariable(
1908 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1909 llvm::GlobalValue::PrivateLinkage,
1910 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1911 ID = Ctor;
1912 }
1913
1914 // Register the information for the entry associated with the constructor.
1915 Out.clear();
1916 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1917 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1918 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1919 }
1920 if (VD->getType().isDestructedType() != QualType::DK_none) {
1921 llvm::Constant *Dtor;
1922 llvm::Constant *ID;
1923 if (CGM.getLangOpts().OpenMPIsDevice) {
1924 // Generate function that emits destructor call for the threadprivate
1925 // copy of the variable VD
1926 CodeGenFunction DtorCGF(CGM);
1927
1928 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1929 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1930 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1931 FTy, Twine(Buffer, "_dtor"), FI, Loc);
1932 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1933 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1934 FunctionArgList(), Loc, Loc);
1935 // Create a scope with an artificial location for the body of this
1936 // function.
1937 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1938 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1939 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1940 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1941 DtorCGF.FinishFunction();
1942 Dtor = Fn;
1943 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1944 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1945 } else {
1946 Dtor = new llvm::GlobalVariable(
1947 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1948 llvm::GlobalValue::PrivateLinkage,
1949 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1950 ID = Dtor;
1951 }
1952 // Register the information for the entry associated with the destructor.
1953 Out.clear();
1954 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1955 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1956 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1957 }
1958 return CGM.getLangOpts().OpenMPIsDevice;
1959 }
1960
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)1961 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1962 QualType VarType,
1963 StringRef Name) {
1964 std::string Suffix = getName({"artificial", ""});
1965 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1966 llvm::Value *GAddr =
1967 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1968 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1969 CGM.getTarget().isTLSSupported()) {
1970 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1971 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1972 }
1973 std::string CacheSuffix = getName({"cache", ""});
1974 llvm::Value *Args[] = {
1975 emitUpdateLocation(CGF, SourceLocation()),
1976 getThreadID(CGF, SourceLocation()),
1977 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1978 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1979 /*isSigned=*/false),
1980 getOrCreateInternalVariable(
1981 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1982 return Address(
1983 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1984 CGF.EmitRuntimeCall(
1985 OMPBuilder.getOrCreateRuntimeFunction(
1986 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1987 Args),
1988 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1989 CGM.getContext().getTypeAlignInChars(VarType));
1990 }
1991
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)1992 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1993 const RegionCodeGenTy &ThenGen,
1994 const RegionCodeGenTy &ElseGen) {
1995 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1996
1997 // If the condition constant folds and can be elided, try to avoid emitting
1998 // the condition and the dead arm of the if/else.
1999 bool CondConstant;
2000 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2001 if (CondConstant)
2002 ThenGen(CGF);
2003 else
2004 ElseGen(CGF);
2005 return;
2006 }
2007
2008 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2009 // emit the conditional branch.
2010 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2011 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2012 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2013 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2014
2015 // Emit the 'then' code.
2016 CGF.EmitBlock(ThenBlock);
2017 ThenGen(CGF);
2018 CGF.EmitBranch(ContBlock);
2019 // Emit the 'else' code if present.
2020 // There is no need to emit line number for unconditional branch.
2021 (void)ApplyDebugLocation::CreateEmpty(CGF);
2022 CGF.EmitBlock(ElseBlock);
2023 ElseGen(CGF);
2024 // There is no need to emit line number for unconditional branch.
2025 (void)ApplyDebugLocation::CreateEmpty(CGF);
2026 CGF.EmitBranch(ContBlock);
2027 // Emit the continuation block for code after the if.
2028 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2029 }
2030
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)2031 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2032 llvm::Function *OutlinedFn,
2033 ArrayRef<llvm::Value *> CapturedVars,
2034 const Expr *IfCond) {
2035 if (!CGF.HaveInsertPoint())
2036 return;
2037 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2038 auto &M = CGM.getModule();
2039 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2040 this](CodeGenFunction &CGF, PrePostActionTy &) {
2041 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2042 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2043 llvm::Value *Args[] = {
2044 RTLoc,
2045 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2046 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2047 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2048 RealArgs.append(std::begin(Args), std::end(Args));
2049 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2050
2051 llvm::FunctionCallee RTLFn =
2052 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2053 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2054 };
2055 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2056 this](CodeGenFunction &CGF, PrePostActionTy &) {
2057 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2058 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2059 // Build calls:
2060 // __kmpc_serialized_parallel(&Loc, GTid);
2061 llvm::Value *Args[] = {RTLoc, ThreadID};
2062 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2063 M, OMPRTL___kmpc_serialized_parallel),
2064 Args);
2065
2066 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2067 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2068 Address ZeroAddrBound =
2069 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2070 /*Name=*/".bound.zero.addr");
2071 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2072 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2073 // ThreadId for serialized parallels is 0.
2074 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2075 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2076 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2077 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2078
2079 // __kmpc_end_serialized_parallel(&Loc, GTid);
2080 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2081 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2082 M, OMPRTL___kmpc_end_serialized_parallel),
2083 EndArgs);
2084 };
2085 if (IfCond) {
2086 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2087 } else {
2088 RegionCodeGenTy ThenRCG(ThenGen);
2089 ThenRCG(CGF);
2090 }
2091 }
2092
2093 // If we're inside an (outlined) parallel region, use the region info's
2094 // thread-ID variable (it is passed in a first argument of the outlined function
2095 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2096 // regular serial code region, get thread ID by calling kmp_int32
2097 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2098 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)2099 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2100 SourceLocation Loc) {
2101 if (auto *OMPRegionInfo =
2102 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2103 if (OMPRegionInfo->getThreadIDVariable())
2104 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2105
2106 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2107 QualType Int32Ty =
2108 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2109 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2110 CGF.EmitStoreOfScalar(ThreadID,
2111 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2112
2113 return ThreadIDTemp;
2114 }
2115
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)2116 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2117 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2118 SmallString<256> Buffer;
2119 llvm::raw_svector_ostream Out(Buffer);
2120 Out << Name;
2121 StringRef RuntimeName = Out.str();
2122 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2123 if (Elem.second) {
2124 assert(Elem.second->getType()->getPointerElementType() == Ty &&
2125 "OMP internal variable has different type than requested");
2126 return &*Elem.second;
2127 }
2128
2129 return Elem.second = new llvm::GlobalVariable(
2130 CGM.getModule(), Ty, /*IsConstant*/ false,
2131 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2132 Elem.first(), /*InsertBefore=*/nullptr,
2133 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2134 }
2135
getCriticalRegionLock(StringRef CriticalName)2136 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2137 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2138 std::string Name = getName({Prefix, "var"});
2139 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2140 }
2141
2142 namespace {
2143 /// Common pre(post)-action for different OpenMP constructs.
2144 class CommonActionTy final : public PrePostActionTy {
2145 llvm::FunctionCallee EnterCallee;
2146 ArrayRef<llvm::Value *> EnterArgs;
2147 llvm::FunctionCallee ExitCallee;
2148 ArrayRef<llvm::Value *> ExitArgs;
2149 bool Conditional;
2150 llvm::BasicBlock *ContBlock = nullptr;
2151
2152 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2153 CommonActionTy(llvm::FunctionCallee EnterCallee,
2154 ArrayRef<llvm::Value *> EnterArgs,
2155 llvm::FunctionCallee ExitCallee,
2156 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2157 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2158 ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2159 void Enter(CodeGenFunction &CGF) override {
2160 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2161 if (Conditional) {
2162 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2163 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2164 ContBlock = CGF.createBasicBlock("omp_if.end");
2165 // Generate the branch (If-stmt)
2166 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2167 CGF.EmitBlock(ThenBlock);
2168 }
2169 }
Done(CodeGenFunction & CGF)2170 void Done(CodeGenFunction &CGF) {
2171 // Emit the rest of blocks/branches
2172 CGF.EmitBranch(ContBlock);
2173 CGF.EmitBlock(ContBlock, true);
2174 }
Exit(CodeGenFunction & CGF)2175 void Exit(CodeGenFunction &CGF) override {
2176 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2177 }
2178 };
2179 } // anonymous namespace
2180
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2181 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2182 StringRef CriticalName,
2183 const RegionCodeGenTy &CriticalOpGen,
2184 SourceLocation Loc, const Expr *Hint) {
2185 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2186 // CriticalOpGen();
2187 // __kmpc_end_critical(ident_t *, gtid, Lock);
2188 // Prepare arguments and build a call to __kmpc_critical
2189 if (!CGF.HaveInsertPoint())
2190 return;
2191 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2192 getCriticalRegionLock(CriticalName)};
2193 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2194 std::end(Args));
2195 if (Hint) {
2196 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2197 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2198 }
2199 CommonActionTy Action(
2200 OMPBuilder.getOrCreateRuntimeFunction(
2201 CGM.getModule(),
2202 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2203 EnterArgs,
2204 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2205 OMPRTL___kmpc_end_critical),
2206 Args);
2207 CriticalOpGen.setAction(Action);
2208 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2209 }
2210
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2211 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2212 const RegionCodeGenTy &MasterOpGen,
2213 SourceLocation Loc) {
2214 if (!CGF.HaveInsertPoint())
2215 return;
2216 // if(__kmpc_master(ident_t *, gtid)) {
2217 // MasterOpGen();
2218 // __kmpc_end_master(ident_t *, gtid);
2219 // }
2220 // Prepare arguments and build a call to __kmpc_master
2221 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2222 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2223 CGM.getModule(), OMPRTL___kmpc_master),
2224 Args,
2225 OMPBuilder.getOrCreateRuntimeFunction(
2226 CGM.getModule(), OMPRTL___kmpc_end_master),
2227 Args,
2228 /*Conditional=*/true);
2229 MasterOpGen.setAction(Action);
2230 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2231 Action.Done(CGF);
2232 }
2233
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2234 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2235 SourceLocation Loc) {
2236 if (!CGF.HaveInsertPoint())
2237 return;
2238 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2239 OMPBuilder.CreateTaskyield(CGF.Builder);
2240 } else {
2241 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2242 llvm::Value *Args[] = {
2243 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2244 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2245 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2246 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2247 Args);
2248 }
2249
2250 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2251 Region->emitUntiedSwitch(CGF);
2252 }
2253
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2254 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2255 const RegionCodeGenTy &TaskgroupOpGen,
2256 SourceLocation Loc) {
2257 if (!CGF.HaveInsertPoint())
2258 return;
2259 // __kmpc_taskgroup(ident_t *, gtid);
2260 // TaskgroupOpGen();
2261 // __kmpc_end_taskgroup(ident_t *, gtid);
2262 // Prepare arguments and build a call to __kmpc_taskgroup
2263 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2264 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2265 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2266 Args,
2267 OMPBuilder.getOrCreateRuntimeFunction(
2268 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2269 Args);
2270 TaskgroupOpGen.setAction(Action);
2271 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2272 }
2273
2274 /// Given an array of pointers to variables, project the address of a
2275 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2276 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2277 unsigned Index, const VarDecl *Var) {
2278 // Pull out the pointer to the variable.
2279 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2280 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2281
2282 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2283 Addr = CGF.Builder.CreateElementBitCast(
2284 Addr, CGF.ConvertTypeForMem(Var->getType()));
2285 return Addr;
2286 }
2287
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2288 static llvm::Value *emitCopyprivateCopyFunction(
2289 CodeGenModule &CGM, llvm::Type *ArgsType,
2290 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2291 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2292 SourceLocation Loc) {
2293 ASTContext &C = CGM.getContext();
2294 // void copy_func(void *LHSArg, void *RHSArg);
2295 FunctionArgList Args;
2296 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2297 ImplicitParamDecl::Other);
2298 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2299 ImplicitParamDecl::Other);
2300 Args.push_back(&LHSArg);
2301 Args.push_back(&RHSArg);
2302 const auto &CGFI =
2303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2304 std::string Name =
2305 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2306 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2307 llvm::GlobalValue::InternalLinkage, Name,
2308 &CGM.getModule());
2309 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2310 Fn->setDoesNotRecurse();
2311 CodeGenFunction CGF(CGM);
2312 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2313 // Dest = (void*[n])(LHSArg);
2314 // Src = (void*[n])(RHSArg);
2315 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2316 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2317 ArgsType), CGF.getPointerAlign());
2318 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2319 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2320 ArgsType), CGF.getPointerAlign());
2321 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2322 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2323 // ...
2324 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2325 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2326 const auto *DestVar =
2327 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2328 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2329
2330 const auto *SrcVar =
2331 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2332 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2333
2334 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2335 QualType Type = VD->getType();
2336 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2337 }
2338 CGF.FinishFunction();
2339 return Fn;
2340 }
2341
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2342 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2343 const RegionCodeGenTy &SingleOpGen,
2344 SourceLocation Loc,
2345 ArrayRef<const Expr *> CopyprivateVars,
2346 ArrayRef<const Expr *> SrcExprs,
2347 ArrayRef<const Expr *> DstExprs,
2348 ArrayRef<const Expr *> AssignmentOps) {
2349 if (!CGF.HaveInsertPoint())
2350 return;
2351 assert(CopyprivateVars.size() == SrcExprs.size() &&
2352 CopyprivateVars.size() == DstExprs.size() &&
2353 CopyprivateVars.size() == AssignmentOps.size());
2354 ASTContext &C = CGM.getContext();
2355 // int32 did_it = 0;
2356 // if(__kmpc_single(ident_t *, gtid)) {
2357 // SingleOpGen();
2358 // __kmpc_end_single(ident_t *, gtid);
2359 // did_it = 1;
2360 // }
2361 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2362 // <copy_func>, did_it);
2363
2364 Address DidIt = Address::invalid();
2365 if (!CopyprivateVars.empty()) {
2366 // int32 did_it = 0;
2367 QualType KmpInt32Ty =
2368 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2369 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2370 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2371 }
2372 // Prepare arguments and build a call to __kmpc_single
2373 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2374 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2375 CGM.getModule(), OMPRTL___kmpc_single),
2376 Args,
2377 OMPBuilder.getOrCreateRuntimeFunction(
2378 CGM.getModule(), OMPRTL___kmpc_end_single),
2379 Args,
2380 /*Conditional=*/true);
2381 SingleOpGen.setAction(Action);
2382 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2383 if (DidIt.isValid()) {
2384 // did_it = 1;
2385 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2386 }
2387 Action.Done(CGF);
2388 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2389 // <copy_func>, did_it);
2390 if (DidIt.isValid()) {
2391 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2392 QualType CopyprivateArrayTy = C.getConstantArrayType(
2393 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2394 /*IndexTypeQuals=*/0);
2395 // Create a list of all private variables for copyprivate.
2396 Address CopyprivateList =
2397 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2398 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2399 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2400 CGF.Builder.CreateStore(
2401 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2402 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2403 CGF.VoidPtrTy),
2404 Elem);
2405 }
2406 // Build function that copies private values from single region to all other
2407 // threads in the corresponding parallel region.
2408 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2409 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2410 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2411 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2412 Address CL =
2413 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2414 CGF.VoidPtrTy);
2415 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2416 llvm::Value *Args[] = {
2417 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2418 getThreadID(CGF, Loc), // i32 <gtid>
2419 BufSize, // size_t <buf_size>
2420 CL.getPointer(), // void *<copyprivate list>
2421 CpyFn, // void (*) (void *, void *) <copy_func>
2422 DidItVal // i32 did_it
2423 };
2424 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2425 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2426 Args);
2427 }
2428 }
2429
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2430 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2431 const RegionCodeGenTy &OrderedOpGen,
2432 SourceLocation Loc, bool IsThreads) {
2433 if (!CGF.HaveInsertPoint())
2434 return;
2435 // __kmpc_ordered(ident_t *, gtid);
2436 // OrderedOpGen();
2437 // __kmpc_end_ordered(ident_t *, gtid);
2438 // Prepare arguments and build a call to __kmpc_ordered
2439 if (IsThreads) {
2440 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2441 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2442 CGM.getModule(), OMPRTL___kmpc_ordered),
2443 Args,
2444 OMPBuilder.getOrCreateRuntimeFunction(
2445 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2446 Args);
2447 OrderedOpGen.setAction(Action);
2448 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2449 return;
2450 }
2451 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2452 }
2453
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2454 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2455 unsigned Flags;
2456 if (Kind == OMPD_for)
2457 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2458 else if (Kind == OMPD_sections)
2459 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2460 else if (Kind == OMPD_single)
2461 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2462 else if (Kind == OMPD_barrier)
2463 Flags = OMP_IDENT_BARRIER_EXPL;
2464 else
2465 Flags = OMP_IDENT_BARRIER_IMPL;
2466 return Flags;
2467 }
2468
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2469 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2470 CodeGenFunction &CGF, const OMPLoopDirective &S,
2471 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2472 // Check if the loop directive is actually a doacross loop directive. In this
2473 // case choose static, 1 schedule.
2474 if (llvm::any_of(
2475 S.getClausesOfKind<OMPOrderedClause>(),
2476 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2477 ScheduleKind = OMPC_SCHEDULE_static;
2478 // Chunk size is 1 in this case.
2479 llvm::APInt ChunkSize(32, 1);
2480 ChunkExpr = IntegerLiteral::Create(
2481 CGF.getContext(), ChunkSize,
2482 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2483 SourceLocation());
2484 }
2485 }
2486
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2487 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2488 OpenMPDirectiveKind Kind, bool EmitChecks,
2489 bool ForceSimpleCall) {
2490 // Check if we should use the OMPBuilder
2491 auto *OMPRegionInfo =
2492 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2493 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2494 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2495 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2496 return;
2497 }
2498
2499 if (!CGF.HaveInsertPoint())
2500 return;
2501 // Build call __kmpc_cancel_barrier(loc, thread_id);
2502 // Build call __kmpc_barrier(loc, thread_id);
2503 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2504 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2505 // thread_id);
2506 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2507 getThreadID(CGF, Loc)};
2508 if (OMPRegionInfo) {
2509 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2510 llvm::Value *Result = CGF.EmitRuntimeCall(
2511 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2512 OMPRTL___kmpc_cancel_barrier),
2513 Args);
2514 if (EmitChecks) {
2515 // if (__kmpc_cancel_barrier()) {
2516 // exit from construct;
2517 // }
2518 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2519 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2520 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2521 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2522 CGF.EmitBlock(ExitBB);
2523 // exit from construct;
2524 CodeGenFunction::JumpDest CancelDestination =
2525 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2526 CGF.EmitBranchThroughCleanup(CancelDestination);
2527 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2528 }
2529 return;
2530 }
2531 }
2532 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2533 CGM.getModule(), OMPRTL___kmpc_barrier),
2534 Args);
2535 }
2536
2537 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2538 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2539 bool Chunked, bool Ordered) {
2540 switch (ScheduleKind) {
2541 case OMPC_SCHEDULE_static:
2542 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2543 : (Ordered ? OMP_ord_static : OMP_sch_static);
2544 case OMPC_SCHEDULE_dynamic:
2545 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2546 case OMPC_SCHEDULE_guided:
2547 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2548 case OMPC_SCHEDULE_runtime:
2549 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2550 case OMPC_SCHEDULE_auto:
2551 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2552 case OMPC_SCHEDULE_unknown:
2553 assert(!Chunked && "chunk was specified but schedule kind not known");
2554 return Ordered ? OMP_ord_static : OMP_sch_static;
2555 }
2556 llvm_unreachable("Unexpected runtime schedule");
2557 }
2558
2559 /// Map the OpenMP distribute schedule to the runtime enumeration.
2560 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2561 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2562 // only static is allowed for dist_schedule
2563 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2564 }
2565
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2566 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2567 bool Chunked) const {
2568 OpenMPSchedType Schedule =
2569 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2570 return Schedule == OMP_sch_static;
2571 }
2572
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2573 bool CGOpenMPRuntime::isStaticNonchunked(
2574 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2575 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2576 return Schedule == OMP_dist_sch_static;
2577 }
2578
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2579 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2580 bool Chunked) const {
2581 OpenMPSchedType Schedule =
2582 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2583 return Schedule == OMP_sch_static_chunked;
2584 }
2585
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2586 bool CGOpenMPRuntime::isStaticChunked(
2587 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2588 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2589 return Schedule == OMP_dist_sch_static_chunked;
2590 }
2591
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2592 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2593 OpenMPSchedType Schedule =
2594 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2595 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2596 return Schedule != OMP_sch_static;
2597 }
2598
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2599 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2600 OpenMPScheduleClauseModifier M1,
2601 OpenMPScheduleClauseModifier M2) {
2602 int Modifier = 0;
2603 switch (M1) {
2604 case OMPC_SCHEDULE_MODIFIER_monotonic:
2605 Modifier = OMP_sch_modifier_monotonic;
2606 break;
2607 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2608 Modifier = OMP_sch_modifier_nonmonotonic;
2609 break;
2610 case OMPC_SCHEDULE_MODIFIER_simd:
2611 if (Schedule == OMP_sch_static_chunked)
2612 Schedule = OMP_sch_static_balanced_chunked;
2613 break;
2614 case OMPC_SCHEDULE_MODIFIER_last:
2615 case OMPC_SCHEDULE_MODIFIER_unknown:
2616 break;
2617 }
2618 switch (M2) {
2619 case OMPC_SCHEDULE_MODIFIER_monotonic:
2620 Modifier = OMP_sch_modifier_monotonic;
2621 break;
2622 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2623 Modifier = OMP_sch_modifier_nonmonotonic;
2624 break;
2625 case OMPC_SCHEDULE_MODIFIER_simd:
2626 if (Schedule == OMP_sch_static_chunked)
2627 Schedule = OMP_sch_static_balanced_chunked;
2628 break;
2629 case OMPC_SCHEDULE_MODIFIER_last:
2630 case OMPC_SCHEDULE_MODIFIER_unknown:
2631 break;
2632 }
2633 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2634 // If the static schedule kind is specified or if the ordered clause is
2635 // specified, and if the nonmonotonic modifier is not specified, the effect is
2636 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2637 // modifier is specified, the effect is as if the nonmonotonic modifier is
2638 // specified.
2639 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2640 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2641 Schedule == OMP_sch_static_balanced_chunked ||
2642 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2643 Schedule == OMP_dist_sch_static_chunked ||
2644 Schedule == OMP_dist_sch_static))
2645 Modifier = OMP_sch_modifier_nonmonotonic;
2646 }
2647 return Schedule | Modifier;
2648 }
2649
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2650 void CGOpenMPRuntime::emitForDispatchInit(
2651 CodeGenFunction &CGF, SourceLocation Loc,
2652 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2653 bool Ordered, const DispatchRTInput &DispatchValues) {
2654 if (!CGF.HaveInsertPoint())
2655 return;
2656 OpenMPSchedType Schedule = getRuntimeSchedule(
2657 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2658 assert(Ordered ||
2659 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2660 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2661 Schedule != OMP_sch_static_balanced_chunked));
2662 // Call __kmpc_dispatch_init(
2663 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2664 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2665 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2666
2667 // If the Chunk was not specified in the clause - use default value 1.
2668 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2669 : CGF.Builder.getIntN(IVSize, 1);
2670 llvm::Value *Args[] = {
2671 emitUpdateLocation(CGF, Loc),
2672 getThreadID(CGF, Loc),
2673 CGF.Builder.getInt32(addMonoNonMonoModifier(
2674 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2675 DispatchValues.LB, // Lower
2676 DispatchValues.UB, // Upper
2677 CGF.Builder.getIntN(IVSize, 1), // Stride
2678 Chunk // Chunk
2679 };
2680 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2681 }
2682
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2683 static void emitForStaticInitCall(
2684 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2685 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2686 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2687 const CGOpenMPRuntime::StaticRTInput &Values) {
2688 if (!CGF.HaveInsertPoint())
2689 return;
2690
2691 assert(!Values.Ordered);
2692 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2693 Schedule == OMP_sch_static_balanced_chunked ||
2694 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2695 Schedule == OMP_dist_sch_static ||
2696 Schedule == OMP_dist_sch_static_chunked);
2697
2698 // Call __kmpc_for_static_init(
2699 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2700 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2701 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2702 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2703 llvm::Value *Chunk = Values.Chunk;
2704 if (Chunk == nullptr) {
2705 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2706 Schedule == OMP_dist_sch_static) &&
2707 "expected static non-chunked schedule");
2708 // If the Chunk was not specified in the clause - use default value 1.
2709 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2710 } else {
2711 assert((Schedule == OMP_sch_static_chunked ||
2712 Schedule == OMP_sch_static_balanced_chunked ||
2713 Schedule == OMP_ord_static_chunked ||
2714 Schedule == OMP_dist_sch_static_chunked) &&
2715 "expected static chunked schedule");
2716 }
2717 llvm::Value *Args[] = {
2718 UpdateLocation,
2719 ThreadId,
2720 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2721 M2)), // Schedule type
2722 Values.IL.getPointer(), // &isLastIter
2723 Values.LB.getPointer(), // &LB
2724 Values.UB.getPointer(), // &UB
2725 Values.ST.getPointer(), // &Stride
2726 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2727 Chunk // Chunk
2728 };
2729 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2730 }
2731
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2732 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2733 SourceLocation Loc,
2734 OpenMPDirectiveKind DKind,
2735 const OpenMPScheduleTy &ScheduleKind,
2736 const StaticRTInput &Values) {
2737 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2738 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2739 assert(isOpenMPWorksharingDirective(DKind) &&
2740 "Expected loop-based or sections-based directive.");
2741 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2742 isOpenMPLoopDirective(DKind)
2743 ? OMP_IDENT_WORK_LOOP
2744 : OMP_IDENT_WORK_SECTIONS);
2745 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2746 llvm::FunctionCallee StaticInitFunction =
2747 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2748 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2749 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2750 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2751 }
2752
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2753 void CGOpenMPRuntime::emitDistributeStaticInit(
2754 CodeGenFunction &CGF, SourceLocation Loc,
2755 OpenMPDistScheduleClauseKind SchedKind,
2756 const CGOpenMPRuntime::StaticRTInput &Values) {
2757 OpenMPSchedType ScheduleNum =
2758 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2759 llvm::Value *UpdatedLocation =
2760 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2761 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2762 llvm::FunctionCallee StaticInitFunction =
2763 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2764 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2765 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2766 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2767 }
2768
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2769 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2770 SourceLocation Loc,
2771 OpenMPDirectiveKind DKind) {
2772 if (!CGF.HaveInsertPoint())
2773 return;
2774 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2775 llvm::Value *Args[] = {
2776 emitUpdateLocation(CGF, Loc,
2777 isOpenMPDistributeDirective(DKind)
2778 ? OMP_IDENT_WORK_DISTRIBUTE
2779 : isOpenMPLoopDirective(DKind)
2780 ? OMP_IDENT_WORK_LOOP
2781 : OMP_IDENT_WORK_SECTIONS),
2782 getThreadID(CGF, Loc)};
2783 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2784 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2785 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2786 Args);
2787 }
2788
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2789 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2790 SourceLocation Loc,
2791 unsigned IVSize,
2792 bool IVSigned) {
2793 if (!CGF.HaveInsertPoint())
2794 return;
2795 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2796 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2797 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2798 }
2799
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2800 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2801 SourceLocation Loc, unsigned IVSize,
2802 bool IVSigned, Address IL,
2803 Address LB, Address UB,
2804 Address ST) {
2805 // Call __kmpc_dispatch_next(
2806 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2807 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2808 // kmp_int[32|64] *p_stride);
2809 llvm::Value *Args[] = {
2810 emitUpdateLocation(CGF, Loc),
2811 getThreadID(CGF, Loc),
2812 IL.getPointer(), // &isLastIter
2813 LB.getPointer(), // &Lower
2814 UB.getPointer(), // &Upper
2815 ST.getPointer() // &Stride
2816 };
2817 llvm::Value *Call =
2818 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2819 return CGF.EmitScalarConversion(
2820 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2821 CGF.getContext().BoolTy, Loc);
2822 }
2823
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2824 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2825 llvm::Value *NumThreads,
2826 SourceLocation Loc) {
2827 if (!CGF.HaveInsertPoint())
2828 return;
2829 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2830 llvm::Value *Args[] = {
2831 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2832 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2833 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2834 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2835 Args);
2836 }
2837
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2838 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2839 ProcBindKind ProcBind,
2840 SourceLocation Loc) {
2841 if (!CGF.HaveInsertPoint())
2842 return;
2843 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2844 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2845 llvm::Value *Args[] = {
2846 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2847 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2848 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2849 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2850 Args);
2851 }
2852
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2853 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2854 SourceLocation Loc, llvm::AtomicOrdering AO) {
2855 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2856 OMPBuilder.CreateFlush(CGF.Builder);
2857 } else {
2858 if (!CGF.HaveInsertPoint())
2859 return;
2860 // Build call void __kmpc_flush(ident_t *loc)
2861 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862 CGM.getModule(), OMPRTL___kmpc_flush),
2863 emitUpdateLocation(CGF, Loc));
2864 }
2865 }
2866
2867 namespace {
2868 /// Indexes of fields for type kmp_task_t.
2869 enum KmpTaskTFields {
2870 /// List of shared variables.
2871 KmpTaskTShareds,
2872 /// Task routine.
2873 KmpTaskTRoutine,
2874 /// Partition id for the untied tasks.
2875 KmpTaskTPartId,
2876 /// Function with call of destructors for private variables.
2877 Data1,
2878 /// Task priority.
2879 Data2,
2880 /// (Taskloops only) Lower bound.
2881 KmpTaskTLowerBound,
2882 /// (Taskloops only) Upper bound.
2883 KmpTaskTUpperBound,
2884 /// (Taskloops only) Stride.
2885 KmpTaskTStride,
2886 /// (Taskloops only) Is last iteration flag.
2887 KmpTaskTLastIter,
2888 /// (Taskloops only) Reduction data.
2889 KmpTaskTReductions,
2890 };
2891 } // anonymous namespace
2892
empty() const2893 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2894 return OffloadEntriesTargetRegion.empty() &&
2895 OffloadEntriesDeviceGlobalVar.empty();
2896 }
2897
2898 /// Initialize target region entry.
2899 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2900 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2901 StringRef ParentName, unsigned LineNum,
2902 unsigned Order) {
2903 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2904 "only required for the device "
2905 "code generation.");
2906 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2907 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2908 OMPTargetRegionEntryTargetRegion);
2909 ++OffloadingEntriesNum;
2910 }
2911
2912 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)2913 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2914 StringRef ParentName, unsigned LineNum,
2915 llvm::Constant *Addr, llvm::Constant *ID,
2916 OMPTargetRegionEntryKind Flags) {
2917 // If we are emitting code for a target, the entry is already initialized,
2918 // only has to be registered.
2919 if (CGM.getLangOpts().OpenMPIsDevice) {
2920 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2921 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2922 DiagnosticsEngine::Error,
2923 "Unable to find target region on line '%0' in the device code.");
2924 CGM.getDiags().Report(DiagID) << LineNum;
2925 return;
2926 }
2927 auto &Entry =
2928 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2929 assert(Entry.isValid() && "Entry not initialized!");
2930 Entry.setAddress(Addr);
2931 Entry.setID(ID);
2932 Entry.setFlags(Flags);
2933 } else {
2934 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2935 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2936 ++OffloadingEntriesNum;
2937 }
2938 }
2939
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum) const2940 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2941 unsigned DeviceID, unsigned FileID, StringRef ParentName,
2942 unsigned LineNum) const {
2943 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2944 if (PerDevice == OffloadEntriesTargetRegion.end())
2945 return false;
2946 auto PerFile = PerDevice->second.find(FileID);
2947 if (PerFile == PerDevice->second.end())
2948 return false;
2949 auto PerParentName = PerFile->second.find(ParentName);
2950 if (PerParentName == PerFile->second.end())
2951 return false;
2952 auto PerLine = PerParentName->second.find(LineNum);
2953 if (PerLine == PerParentName->second.end())
2954 return false;
2955 // Fail if this entry is already registered.
2956 if (PerLine->second.getAddress() || PerLine->second.getID())
2957 return false;
2958 return true;
2959 }
2960
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)2961 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2962 const OffloadTargetRegionEntryInfoActTy &Action) {
2963 // Scan all target region entries and perform the provided action.
2964 for (const auto &D : OffloadEntriesTargetRegion)
2965 for (const auto &F : D.second)
2966 for (const auto &P : F.second)
2967 for (const auto &L : P.second)
2968 Action(D.first, F.first, P.first(), L.first, L.second);
2969 }
2970
2971 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)2972 initializeDeviceGlobalVarEntryInfo(StringRef Name,
2973 OMPTargetGlobalVarEntryKind Flags,
2974 unsigned Order) {
2975 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2976 "only required for the device "
2977 "code generation.");
2978 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
2979 ++OffloadingEntriesNum;
2980 }
2981
2982 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)2983 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
2984 CharUnits VarSize,
2985 OMPTargetGlobalVarEntryKind Flags,
2986 llvm::GlobalValue::LinkageTypes Linkage) {
2987 if (CGM.getLangOpts().OpenMPIsDevice) {
2988 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
2989 assert(Entry.isValid() && Entry.getFlags() == Flags &&
2990 "Entry not initialized!");
2991 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
2992 "Resetting with the new address.");
2993 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
2994 if (Entry.getVarSize().isZero()) {
2995 Entry.setVarSize(VarSize);
2996 Entry.setLinkage(Linkage);
2997 }
2998 return;
2999 }
3000 Entry.setVarSize(VarSize);
3001 Entry.setLinkage(Linkage);
3002 Entry.setAddress(Addr);
3003 } else {
3004 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3005 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3006 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3007 "Entry not initialized!");
3008 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3009 "Resetting with the new address.");
3010 if (Entry.getVarSize().isZero()) {
3011 Entry.setVarSize(VarSize);
3012 Entry.setLinkage(Linkage);
3013 }
3014 return;
3015 }
3016 OffloadEntriesDeviceGlobalVar.try_emplace(
3017 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3018 ++OffloadingEntriesNum;
3019 }
3020 }
3021
3022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)3023 actOnDeviceGlobalVarEntriesInfo(
3024 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3025 // Scan all target region entries and perform the provided action.
3026 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3027 Action(E.getKey(), E.getValue());
3028 }
3029
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)3030 void CGOpenMPRuntime::createOffloadEntry(
3031 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3032 llvm::GlobalValue::LinkageTypes Linkage) {
3033 StringRef Name = Addr->getName();
3034 llvm::Module &M = CGM.getModule();
3035 llvm::LLVMContext &C = M.getContext();
3036
3037 // Create constant string with the name.
3038 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3039
3040 std::string StringName = getName({"omp_offloading", "entry_name"});
3041 auto *Str = new llvm::GlobalVariable(
3042 M, StrPtrInit->getType(), /*isConstant=*/true,
3043 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3044 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3045
3046 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3047 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3048 llvm::ConstantInt::get(CGM.SizeTy, Size),
3049 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3050 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3051 std::string EntryName = getName({"omp_offloading", "entry", ""});
3052 llvm::GlobalVariable *Entry = createGlobalStruct(
3053 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3054 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3055
3056 // The entry has to be created in the section the linker expects it to be.
3057 Entry->setSection("omp_offloading_entries");
3058 }
3059
createOffloadEntriesAndInfoMetadata()3060 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3061 // Emit the offloading entries and metadata so that the device codegen side
3062 // can easily figure out what to emit. The produced metadata looks like
3063 // this:
3064 //
3065 // !omp_offload.info = !{!1, ...}
3066 //
3067 // Right now we only generate metadata for function that contain target
3068 // regions.
3069
3070 // If we are in simd mode or there are no entries, we don't need to do
3071 // anything.
3072 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3073 return;
3074
3075 llvm::Module &M = CGM.getModule();
3076 llvm::LLVMContext &C = M.getContext();
3077 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3078 SourceLocation, StringRef>,
3079 16>
3080 OrderedEntries(OffloadEntriesInfoManager.size());
3081 llvm::SmallVector<StringRef, 16> ParentFunctions(
3082 OffloadEntriesInfoManager.size());
3083
3084 // Auxiliary methods to create metadata values and strings.
3085 auto &&GetMDInt = [this](unsigned V) {
3086 return llvm::ConstantAsMetadata::get(
3087 llvm::ConstantInt::get(CGM.Int32Ty, V));
3088 };
3089
3090 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3091
3092 // Create the offloading info metadata node.
3093 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3094
3095 // Create function that emits metadata for each target region entry;
3096 auto &&TargetRegionMetadataEmitter =
3097 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3098 &GetMDString](
3099 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3100 unsigned Line,
3101 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3102 // Generate metadata for target regions. Each entry of this metadata
3103 // contains:
3104 // - Entry 0 -> Kind of this type of metadata (0).
3105 // - Entry 1 -> Device ID of the file where the entry was identified.
3106 // - Entry 2 -> File ID of the file where the entry was identified.
3107 // - Entry 3 -> Mangled name of the function where the entry was
3108 // identified.
3109 // - Entry 4 -> Line in the file where the entry was identified.
3110 // - Entry 5 -> Order the entry was created.
3111 // The first element of the metadata node is the kind.
3112 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3113 GetMDInt(FileID), GetMDString(ParentName),
3114 GetMDInt(Line), GetMDInt(E.getOrder())};
3115
3116 SourceLocation Loc;
3117 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3118 E = CGM.getContext().getSourceManager().fileinfo_end();
3119 I != E; ++I) {
3120 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3121 I->getFirst()->getUniqueID().getFile() == FileID) {
3122 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3123 I->getFirst(), Line, 1);
3124 break;
3125 }
3126 }
3127 // Save this entry in the right position of the ordered entries array.
3128 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3129 ParentFunctions[E.getOrder()] = ParentName;
3130
3131 // Add metadata to the named metadata node.
3132 MD->addOperand(llvm::MDNode::get(C, Ops));
3133 };
3134
3135 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3136 TargetRegionMetadataEmitter);
3137
3138 // Create function that emits metadata for each device global variable entry;
3139 auto &&DeviceGlobalVarMetadataEmitter =
3140 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3141 MD](StringRef MangledName,
3142 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3143 &E) {
3144 // Generate metadata for global variables. Each entry of this metadata
3145 // contains:
3146 // - Entry 0 -> Kind of this type of metadata (1).
3147 // - Entry 1 -> Mangled name of the variable.
3148 // - Entry 2 -> Declare target kind.
3149 // - Entry 3 -> Order the entry was created.
3150 // The first element of the metadata node is the kind.
3151 llvm::Metadata *Ops[] = {
3152 GetMDInt(E.getKind()), GetMDString(MangledName),
3153 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3154
3155 // Save this entry in the right position of the ordered entries array.
3156 OrderedEntries[E.getOrder()] =
3157 std::make_tuple(&E, SourceLocation(), MangledName);
3158
3159 // Add metadata to the named metadata node.
3160 MD->addOperand(llvm::MDNode::get(C, Ops));
3161 };
3162
3163 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3164 DeviceGlobalVarMetadataEmitter);
3165
3166 for (const auto &E : OrderedEntries) {
3167 assert(std::get<0>(E) && "All ordered entries must exist!");
3168 if (const auto *CE =
3169 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3170 std::get<0>(E))) {
3171 if (!CE->getID() || !CE->getAddress()) {
3172 // Do not blame the entry if the parent funtion is not emitted.
3173 StringRef FnName = ParentFunctions[CE->getOrder()];
3174 if (!CGM.GetGlobalValue(FnName))
3175 continue;
3176 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3177 DiagnosticsEngine::Error,
3178 "Offloading entry for target region in %0 is incorrect: either the "
3179 "address or the ID is invalid.");
3180 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3181 continue;
3182 }
3183 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3184 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3185 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3186 OffloadEntryInfoDeviceGlobalVar>(
3187 std::get<0>(E))) {
3188 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3189 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3190 CE->getFlags());
3191 switch (Flags) {
3192 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3193 if (CGM.getLangOpts().OpenMPIsDevice &&
3194 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3195 continue;
3196 if (!CE->getAddress()) {
3197 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3198 DiagnosticsEngine::Error, "Offloading entry for declare target "
3199 "variable %0 is incorrect: the "
3200 "address is invalid.");
3201 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3202 continue;
3203 }
3204 // The vaiable has no definition - no need to add the entry.
3205 if (CE->getVarSize().isZero())
3206 continue;
3207 break;
3208 }
3209 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3210 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3211 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3212 "Declaret target link address is set.");
3213 if (CGM.getLangOpts().OpenMPIsDevice)
3214 continue;
3215 if (!CE->getAddress()) {
3216 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3217 DiagnosticsEngine::Error,
3218 "Offloading entry for declare target variable is incorrect: the "
3219 "address is invalid.");
3220 CGM.getDiags().Report(DiagID);
3221 continue;
3222 }
3223 break;
3224 }
3225 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3226 CE->getVarSize().getQuantity(), Flags,
3227 CE->getLinkage());
3228 } else {
3229 llvm_unreachable("Unsupported entry kind.");
3230 }
3231 }
3232 }
3233
3234 /// Loads all the offload entries information from the host IR
3235 /// metadata.
loadOffloadInfoMetadata()3236 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3237 // If we are in target mode, load the metadata from the host IR. This code has
3238 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3239
3240 if (!CGM.getLangOpts().OpenMPIsDevice)
3241 return;
3242
3243 if (CGM.getLangOpts().OMPHostIRFile.empty())
3244 return;
3245
3246 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3247 if (auto EC = Buf.getError()) {
3248 CGM.getDiags().Report(diag::err_cannot_open_file)
3249 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3250 return;
3251 }
3252
3253 llvm::LLVMContext C;
3254 auto ME = expectedToErrorOrAndEmitErrors(
3255 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3256
3257 if (auto EC = ME.getError()) {
3258 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3259 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3260 CGM.getDiags().Report(DiagID)
3261 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3262 return;
3263 }
3264
3265 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3266 if (!MD)
3267 return;
3268
3269 for (llvm::MDNode *MN : MD->operands()) {
3270 auto &&GetMDInt = [MN](unsigned Idx) {
3271 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3272 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3273 };
3274
3275 auto &&GetMDString = [MN](unsigned Idx) {
3276 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3277 return V->getString();
3278 };
3279
3280 switch (GetMDInt(0)) {
3281 default:
3282 llvm_unreachable("Unexpected metadata!");
3283 break;
3284 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3285 OffloadingEntryInfoTargetRegion:
3286 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3287 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3288 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3289 /*Order=*/GetMDInt(5));
3290 break;
3291 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3292 OffloadingEntryInfoDeviceGlobalVar:
3293 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3294 /*MangledName=*/GetMDString(1),
3295 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3296 /*Flags=*/GetMDInt(2)),
3297 /*Order=*/GetMDInt(3));
3298 break;
3299 }
3300 }
3301 }
3302
emitKmpRoutineEntryT(QualType KmpInt32Ty)3303 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3304 if (!KmpRoutineEntryPtrTy) {
3305 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3306 ASTContext &C = CGM.getContext();
3307 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3308 FunctionProtoType::ExtProtoInfo EPI;
3309 KmpRoutineEntryPtrQTy = C.getPointerType(
3310 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3311 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3312 }
3313 }
3314
getTgtOffloadEntryQTy()3315 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3316 // Make sure the type of the entry is already created. This is the type we
3317 // have to create:
3318 // struct __tgt_offload_entry{
3319 // void *addr; // Pointer to the offload entry info.
3320 // // (function or global)
3321 // char *name; // Name of the function or global.
3322 // size_t size; // Size of the entry info (0 if it a function).
3323 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3324 // int32_t reserved; // Reserved, to use by the runtime library.
3325 // };
3326 if (TgtOffloadEntryQTy.isNull()) {
3327 ASTContext &C = CGM.getContext();
3328 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3329 RD->startDefinition();
3330 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3331 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3332 addFieldToRecordDecl(C, RD, C.getSizeType());
3333 addFieldToRecordDecl(
3334 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3335 addFieldToRecordDecl(
3336 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3337 RD->completeDefinition();
3338 RD->addAttr(PackedAttr::CreateImplicit(C));
3339 TgtOffloadEntryQTy = C.getRecordType(RD);
3340 }
3341 return TgtOffloadEntryQTy;
3342 }
3343
3344 namespace {
3345 struct PrivateHelpersTy {
PrivateHelpersTy__anona637da6d1611::PrivateHelpersTy3346 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3347 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3348 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3349 PrivateElemInit(PrivateElemInit) {}
3350 const Expr *OriginalRef = nullptr;
3351 const VarDecl *Original = nullptr;
3352 const VarDecl *PrivateCopy = nullptr;
3353 const VarDecl *PrivateElemInit = nullptr;
3354 };
3355 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3356 } // anonymous namespace
3357
3358 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3359 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3360 if (!Privates.empty()) {
3361 ASTContext &C = CGM.getContext();
3362 // Build struct .kmp_privates_t. {
3363 // /* private vars */
3364 // };
3365 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3366 RD->startDefinition();
3367 for (const auto &Pair : Privates) {
3368 const VarDecl *VD = Pair.second.Original;
3369 QualType Type = VD->getType().getNonReferenceType();
3370 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3371 if (VD->hasAttrs()) {
3372 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3373 E(VD->getAttrs().end());
3374 I != E; ++I)
3375 FD->addAttr(*I);
3376 }
3377 }
3378 RD->completeDefinition();
3379 return RD;
3380 }
3381 return nullptr;
3382 }
3383
3384 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3385 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3386 QualType KmpInt32Ty,
3387 QualType KmpRoutineEntryPointerQTy) {
3388 ASTContext &C = CGM.getContext();
3389 // Build struct kmp_task_t {
3390 // void * shareds;
3391 // kmp_routine_entry_t routine;
3392 // kmp_int32 part_id;
3393 // kmp_cmplrdata_t data1;
3394 // kmp_cmplrdata_t data2;
3395 // For taskloops additional fields:
3396 // kmp_uint64 lb;
3397 // kmp_uint64 ub;
3398 // kmp_int64 st;
3399 // kmp_int32 liter;
3400 // void * reductions;
3401 // };
3402 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3403 UD->startDefinition();
3404 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3405 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3406 UD->completeDefinition();
3407 QualType KmpCmplrdataTy = C.getRecordType(UD);
3408 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3409 RD->startDefinition();
3410 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3411 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3412 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3413 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3414 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3415 if (isOpenMPTaskLoopDirective(Kind)) {
3416 QualType KmpUInt64Ty =
3417 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3418 QualType KmpInt64Ty =
3419 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3420 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3421 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3422 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3423 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3424 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3425 }
3426 RD->completeDefinition();
3427 return RD;
3428 }
3429
3430 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3431 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3432 ArrayRef<PrivateDataTy> Privates) {
3433 ASTContext &C = CGM.getContext();
3434 // Build struct kmp_task_t_with_privates {
3435 // kmp_task_t task_data;
3436 // .kmp_privates_t. privates;
3437 // };
3438 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3439 RD->startDefinition();
3440 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3441 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3442 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3443 RD->completeDefinition();
3444 return RD;
3445 }
3446
3447 /// Emit a proxy function which accepts kmp_task_t as the second
3448 /// argument.
3449 /// \code
3450 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3451 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3452 /// For taskloops:
3453 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3454 /// tt->reductions, tt->shareds);
3455 /// return 0;
3456 /// }
3457 /// \endcode
3458 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)3459 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3460 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3461 QualType KmpTaskTWithPrivatesPtrQTy,
3462 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3463 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3464 llvm::Value *TaskPrivatesMap) {
3465 ASTContext &C = CGM.getContext();
3466 FunctionArgList Args;
3467 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3468 ImplicitParamDecl::Other);
3469 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3470 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3471 ImplicitParamDecl::Other);
3472 Args.push_back(&GtidArg);
3473 Args.push_back(&TaskTypeArg);
3474 const auto &TaskEntryFnInfo =
3475 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3476 llvm::FunctionType *TaskEntryTy =
3477 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3478 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3479 auto *TaskEntry = llvm::Function::Create(
3480 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3481 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3482 TaskEntry->setDoesNotRecurse();
3483 CodeGenFunction CGF(CGM);
3484 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3485 Loc, Loc);
3486
3487 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3488 // tt,
3489 // For taskloops:
3490 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491 // tt->task_data.shareds);
3492 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3493 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3494 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3495 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3496 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3497 const auto *KmpTaskTWithPrivatesQTyRD =
3498 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3499 LValue Base =
3500 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3501 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3502 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3503 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3504 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3505
3506 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3507 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3508 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3509 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3510 CGF.ConvertTypeForMem(SharedsPtrTy));
3511
3512 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3513 llvm::Value *PrivatesParam;
3514 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3515 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3516 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3517 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3518 } else {
3519 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3520 }
3521
3522 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3523 TaskPrivatesMap,
3524 CGF.Builder
3525 .CreatePointerBitCastOrAddrSpaceCast(
3526 TDBase.getAddress(CGF), CGF.VoidPtrTy)
3527 .getPointer()};
3528 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3529 std::end(CommonArgs));
3530 if (isOpenMPTaskLoopDirective(Kind)) {
3531 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3532 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3533 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3534 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3535 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3536 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3537 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3538 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3539 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3540 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3541 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3542 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3543 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3544 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3545 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3546 CallArgs.push_back(LBParam);
3547 CallArgs.push_back(UBParam);
3548 CallArgs.push_back(StParam);
3549 CallArgs.push_back(LIParam);
3550 CallArgs.push_back(RParam);
3551 }
3552 CallArgs.push_back(SharedsParam);
3553
3554 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3555 CallArgs);
3556 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3557 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3558 CGF.FinishFunction();
3559 return TaskEntry;
3560 }
3561
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3562 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3563 SourceLocation Loc,
3564 QualType KmpInt32Ty,
3565 QualType KmpTaskTWithPrivatesPtrQTy,
3566 QualType KmpTaskTWithPrivatesQTy) {
3567 ASTContext &C = CGM.getContext();
3568 FunctionArgList Args;
3569 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3570 ImplicitParamDecl::Other);
3571 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3572 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3573 ImplicitParamDecl::Other);
3574 Args.push_back(&GtidArg);
3575 Args.push_back(&TaskTypeArg);
3576 const auto &DestructorFnInfo =
3577 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3578 llvm::FunctionType *DestructorFnTy =
3579 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3580 std::string Name =
3581 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3582 auto *DestructorFn =
3583 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3584 Name, &CGM.getModule());
3585 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3586 DestructorFnInfo);
3587 DestructorFn->setDoesNotRecurse();
3588 CodeGenFunction CGF(CGM);
3589 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3590 Args, Loc, Loc);
3591
3592 LValue Base = CGF.EmitLoadOfPointerLValue(
3593 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3594 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3595 const auto *KmpTaskTWithPrivatesQTyRD =
3596 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3597 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3598 Base = CGF.EmitLValueForField(Base, *FI);
3599 for (const auto *Field :
3600 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3601 if (QualType::DestructionKind DtorKind =
3602 Field->getType().isDestructedType()) {
3603 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3604 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3605 }
3606 }
3607 CGF.FinishFunction();
3608 return DestructorFn;
3609 }
3610
3611 /// Emit a privates mapping function for correct handling of private and
3612 /// firstprivate variables.
3613 /// \code
3614 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3615 /// **noalias priv1,..., <tyn> **noalias privn) {
3616 /// *priv1 = &.privates.priv1;
3617 /// ...;
3618 /// *privn = &.privates.privn;
3619 /// }
3620 /// \endcode
3621 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > FirstprivateVars,ArrayRef<const Expr * > LastprivateVars,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3622 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3623 ArrayRef<const Expr *> PrivateVars,
3624 ArrayRef<const Expr *> FirstprivateVars,
3625 ArrayRef<const Expr *> LastprivateVars,
3626 QualType PrivatesQTy,
3627 ArrayRef<PrivateDataTy> Privates) {
3628 ASTContext &C = CGM.getContext();
3629 FunctionArgList Args;
3630 ImplicitParamDecl TaskPrivatesArg(
3631 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3632 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3633 ImplicitParamDecl::Other);
3634 Args.push_back(&TaskPrivatesArg);
3635 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3636 unsigned Counter = 1;
3637 for (const Expr *E : PrivateVars) {
3638 Args.push_back(ImplicitParamDecl::Create(
3639 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3640 C.getPointerType(C.getPointerType(E->getType()))
3641 .withConst()
3642 .withRestrict(),
3643 ImplicitParamDecl::Other));
3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645 PrivateVarsPos[VD] = Counter;
3646 ++Counter;
3647 }
3648 for (const Expr *E : FirstprivateVars) {
3649 Args.push_back(ImplicitParamDecl::Create(
3650 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3651 C.getPointerType(C.getPointerType(E->getType()))
3652 .withConst()
3653 .withRestrict(),
3654 ImplicitParamDecl::Other));
3655 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3656 PrivateVarsPos[VD] = Counter;
3657 ++Counter;
3658 }
3659 for (const Expr *E : LastprivateVars) {
3660 Args.push_back(ImplicitParamDecl::Create(
3661 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3662 C.getPointerType(C.getPointerType(E->getType()))
3663 .withConst()
3664 .withRestrict(),
3665 ImplicitParamDecl::Other));
3666 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3667 PrivateVarsPos[VD] = Counter;
3668 ++Counter;
3669 }
3670 const auto &TaskPrivatesMapFnInfo =
3671 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3672 llvm::FunctionType *TaskPrivatesMapTy =
3673 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3674 std::string Name =
3675 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3676 auto *TaskPrivatesMap = llvm::Function::Create(
3677 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3678 &CGM.getModule());
3679 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3680 TaskPrivatesMapFnInfo);
3681 if (CGM.getLangOpts().Optimize) {
3682 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3683 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3684 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3685 }
3686 CodeGenFunction CGF(CGM);
3687 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3688 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3689
3690 // *privi = &.privates.privi;
3691 LValue Base = CGF.EmitLoadOfPointerLValue(
3692 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3693 TaskPrivatesArg.getType()->castAs<PointerType>());
3694 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3695 Counter = 0;
3696 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3697 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3698 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3699 LValue RefLVal =
3700 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3701 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3702 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3703 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3704 ++Counter;
3705 }
3706 CGF.FinishFunction();
3707 return TaskPrivatesMap;
3708 }
3709
3710 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3711 static void emitPrivatesInit(CodeGenFunction &CGF,
3712 const OMPExecutableDirective &D,
3713 Address KmpTaskSharedsPtr, LValue TDBase,
3714 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3715 QualType SharedsTy, QualType SharedsPtrTy,
3716 const OMPTaskDataTy &Data,
3717 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3718 ASTContext &C = CGF.getContext();
3719 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3720 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3721 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3722 ? OMPD_taskloop
3723 : OMPD_task;
3724 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3725 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3726 LValue SrcBase;
3727 bool IsTargetTask =
3728 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3729 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3730 // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3731 // PointersArray and SizesArray. The original variables for these arrays are
3732 // not captured and we get their addresses explicitly.
3733 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3734 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3735 SrcBase = CGF.MakeAddrLValue(
3736 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3737 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3738 SharedsTy);
3739 }
3740 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3741 for (const PrivateDataTy &Pair : Privates) {
3742 const VarDecl *VD = Pair.second.PrivateCopy;
3743 const Expr *Init = VD->getAnyInitializer();
3744 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3745 !CGF.isTrivialInitializer(Init)))) {
3746 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3747 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3748 const VarDecl *OriginalVD = Pair.second.Original;
3749 // Check if the variable is the target-based BasePointersArray,
3750 // PointersArray or SizesArray.
3751 LValue SharedRefLValue;
3752 QualType Type = PrivateLValue.getType();
3753 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3754 if (IsTargetTask && !SharedField) {
3755 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3756 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3757 cast<CapturedDecl>(OriginalVD->getDeclContext())
3758 ->getNumParams() == 0 &&
3759 isa<TranslationUnitDecl>(
3760 cast<CapturedDecl>(OriginalVD->getDeclContext())
3761 ->getDeclContext()) &&
3762 "Expected artificial target data variable.");
3763 SharedRefLValue =
3764 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3765 } else if (ForDup) {
3766 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3767 SharedRefLValue = CGF.MakeAddrLValue(
3768 Address(SharedRefLValue.getPointer(CGF),
3769 C.getDeclAlign(OriginalVD)),
3770 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3771 SharedRefLValue.getTBAAInfo());
3772 } else if (CGF.LambdaCaptureFields.count(
3773 Pair.second.Original->getCanonicalDecl()) > 0 ||
3774 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3775 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3776 } else {
3777 // Processing for implicitly captured variables.
3778 InlinedOpenMPRegionRAII Region(
3779 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3780 /*HasCancel=*/false);
3781 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3782 }
3783 if (Type->isArrayType()) {
3784 // Initialize firstprivate array.
3785 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3786 // Perform simple memcpy.
3787 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3788 } else {
3789 // Initialize firstprivate array using element-by-element
3790 // initialization.
3791 CGF.EmitOMPAggregateAssign(
3792 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3793 Type,
3794 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3795 Address SrcElement) {
3796 // Clean up any temporaries needed by the initialization.
3797 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3798 InitScope.addPrivate(
3799 Elem, [SrcElement]() -> Address { return SrcElement; });
3800 (void)InitScope.Privatize();
3801 // Emit initialization for single element.
3802 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3803 CGF, &CapturesInfo);
3804 CGF.EmitAnyExprToMem(Init, DestElement,
3805 Init->getType().getQualifiers(),
3806 /*IsInitializer=*/false);
3807 });
3808 }
3809 } else {
3810 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3811 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3812 return SharedRefLValue.getAddress(CGF);
3813 });
3814 (void)InitScope.Privatize();
3815 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3816 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3817 /*capturedByInit=*/false);
3818 }
3819 } else {
3820 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3821 }
3822 }
3823 ++FI;
3824 }
3825 }
3826
3827 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3828 static bool checkInitIsRequired(CodeGenFunction &CGF,
3829 ArrayRef<PrivateDataTy> Privates) {
3830 bool InitRequired = false;
3831 for (const PrivateDataTy &Pair : Privates) {
3832 const VarDecl *VD = Pair.second.PrivateCopy;
3833 const Expr *Init = VD->getAnyInitializer();
3834 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3835 !CGF.isTrivialInitializer(Init));
3836 if (InitRequired)
3837 break;
3838 }
3839 return InitRequired;
3840 }
3841
3842
3843 /// Emit task_dup function (for initialization of
3844 /// private/firstprivate/lastprivate vars and last_iter flag)
3845 /// \code
3846 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3847 /// lastpriv) {
3848 /// // setup lastprivate flag
3849 /// task_dst->last = lastpriv;
3850 /// // could be constructor calls here...
3851 /// }
3852 /// \endcode
3853 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3854 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3855 const OMPExecutableDirective &D,
3856 QualType KmpTaskTWithPrivatesPtrQTy,
3857 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3858 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3859 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3860 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3861 ASTContext &C = CGM.getContext();
3862 FunctionArgList Args;
3863 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3864 KmpTaskTWithPrivatesPtrQTy,
3865 ImplicitParamDecl::Other);
3866 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3867 KmpTaskTWithPrivatesPtrQTy,
3868 ImplicitParamDecl::Other);
3869 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3870 ImplicitParamDecl::Other);
3871 Args.push_back(&DstArg);
3872 Args.push_back(&SrcArg);
3873 Args.push_back(&LastprivArg);
3874 const auto &TaskDupFnInfo =
3875 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3876 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3877 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3878 auto *TaskDup = llvm::Function::Create(
3879 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3880 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3881 TaskDup->setDoesNotRecurse();
3882 CodeGenFunction CGF(CGM);
3883 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3884 Loc);
3885
3886 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3887 CGF.GetAddrOfLocalVar(&DstArg),
3888 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3889 // task_dst->liter = lastpriv;
3890 if (WithLastIter) {
3891 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3892 LValue Base = CGF.EmitLValueForField(
3893 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3894 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3895 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3896 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3897 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3898 }
3899
3900 // Emit initial values for private copies (if any).
3901 assert(!Privates.empty());
3902 Address KmpTaskSharedsPtr = Address::invalid();
3903 if (!Data.FirstprivateVars.empty()) {
3904 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3905 CGF.GetAddrOfLocalVar(&SrcArg),
3906 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3907 LValue Base = CGF.EmitLValueForField(
3908 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3909 KmpTaskSharedsPtr = Address(
3910 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3911 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3912 KmpTaskTShareds)),
3913 Loc),
3914 CGM.getNaturalTypeAlignment(SharedsTy));
3915 }
3916 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3917 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3918 CGF.FinishFunction();
3919 return TaskDup;
3920 }
3921
3922 /// Checks if destructor function is required to be generated.
3923 /// \return true if cleanups are required, false otherwise.
3924 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD)3925 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
3926 bool NeedsCleanup = false;
3927 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3928 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
3929 for (const FieldDecl *FD : PrivateRD->fields()) {
3930 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
3931 if (NeedsCleanup)
3932 break;
3933 }
3934 return NeedsCleanup;
3935 }
3936
3937 namespace {
3938 /// Loop generator for OpenMP iterator expression.
3939 class OMPIteratorGeneratorScope final
3940 : public CodeGenFunction::OMPPrivateScope {
3941 CodeGenFunction &CGF;
3942 const OMPIteratorExpr *E = nullptr;
3943 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3944 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3945 OMPIteratorGeneratorScope() = delete;
3946 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3947
3948 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)3949 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3950 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3951 if (!E)
3952 return;
3953 SmallVector<llvm::Value *, 4> Uppers;
3954 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3955 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3956 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3957 addPrivate(VD, [&CGF, VD]() {
3958 return CGF.CreateMemTemp(VD->getType(), VD->getName());
3959 });
3960 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3961 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
3962 return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
3963 "counter.addr");
3964 });
3965 }
3966 Privatize();
3967
3968 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3969 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3970 LValue CLVal =
3971 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3972 HelperData.CounterVD->getType());
3973 // Counter = 0;
3974 CGF.EmitStoreOfScalar(
3975 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3976 CLVal);
3977 CodeGenFunction::JumpDest &ContDest =
3978 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3979 CodeGenFunction::JumpDest &ExitDest =
3980 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3981 // N = <number-of_iterations>;
3982 llvm::Value *N = Uppers[I];
3983 // cont:
3984 // if (Counter < N) goto body; else goto exit;
3985 CGF.EmitBlock(ContDest.getBlock());
3986 auto *CVal =
3987 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3988 llvm::Value *Cmp =
3989 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3990 ? CGF.Builder.CreateICmpSLT(CVal, N)
3991 : CGF.Builder.CreateICmpULT(CVal, N);
3992 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3993 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3994 // body:
3995 CGF.EmitBlock(BodyBB);
3996 // Iteri = Begini + Counter * Stepi;
3997 CGF.EmitIgnoredExpr(HelperData.Update);
3998 }
3999 }
~OMPIteratorGeneratorScope()4000 ~OMPIteratorGeneratorScope() {
4001 if (!E)
4002 return;
4003 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4004 // Counter = Counter + 1;
4005 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4006 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4007 // goto cont;
4008 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4009 // exit:
4010 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4011 }
4012 }
4013 };
4014 } // namespace
4015
4016 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)4017 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4018 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4019 llvm::Value *Addr;
4020 if (OASE) {
4021 const Expr *Base = OASE->getBase();
4022 Addr = CGF.EmitScalarExpr(Base);
4023 } else {
4024 Addr = CGF.EmitLValue(E).getPointer(CGF);
4025 }
4026 llvm::Value *SizeVal;
4027 QualType Ty = E->getType();
4028 if (OASE) {
4029 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4030 for (const Expr *SE : OASE->getDimensions()) {
4031 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4032 Sz = CGF.EmitScalarConversion(
4033 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4034 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4035 }
4036 } else if (const auto *ASE =
4037 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4038 LValue UpAddrLVal =
4039 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4040 llvm::Value *UpAddr =
4041 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4042 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4043 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4044 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4045 } else {
4046 SizeVal = CGF.getTypeSize(Ty);
4047 }
4048 return std::make_pair(Addr, SizeVal);
4049 }
4050
4051 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)4052 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4053 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4054 if (KmpTaskAffinityInfoTy.isNull()) {
4055 RecordDecl *KmpAffinityInfoRD =
4056 C.buildImplicitRecord("kmp_task_affinity_info_t");
4057 KmpAffinityInfoRD->startDefinition();
4058 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4059 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4060 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4061 KmpAffinityInfoRD->completeDefinition();
4062 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4063 }
4064 }
4065
4066 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4067 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4068 const OMPExecutableDirective &D,
4069 llvm::Function *TaskFunction, QualType SharedsTy,
4070 Address Shareds, const OMPTaskDataTy &Data) {
4071 ASTContext &C = CGM.getContext();
4072 llvm::SmallVector<PrivateDataTy, 4> Privates;
4073 // Aggregate privates and sort them by the alignment.
4074 const auto *I = Data.PrivateCopies.begin();
4075 for (const Expr *E : Data.PrivateVars) {
4076 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4077 Privates.emplace_back(
4078 C.getDeclAlign(VD),
4079 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4080 /*PrivateElemInit=*/nullptr));
4081 ++I;
4082 }
4083 I = Data.FirstprivateCopies.begin();
4084 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4085 for (const Expr *E : Data.FirstprivateVars) {
4086 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4087 Privates.emplace_back(
4088 C.getDeclAlign(VD),
4089 PrivateHelpersTy(
4090 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4091 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4092 ++I;
4093 ++IElemInitRef;
4094 }
4095 I = Data.LastprivateCopies.begin();
4096 for (const Expr *E : Data.LastprivateVars) {
4097 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4098 Privates.emplace_back(
4099 C.getDeclAlign(VD),
4100 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4101 /*PrivateElemInit=*/nullptr));
4102 ++I;
4103 }
4104 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4105 return L.first > R.first;
4106 });
4107 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4108 // Build type kmp_routine_entry_t (if not built yet).
4109 emitKmpRoutineEntryT(KmpInt32Ty);
4110 // Build type kmp_task_t (if not built yet).
4111 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4112 if (SavedKmpTaskloopTQTy.isNull()) {
4113 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4114 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4115 }
4116 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4117 } else {
4118 assert((D.getDirectiveKind() == OMPD_task ||
4119 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4120 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4121 "Expected taskloop, task or target directive");
4122 if (SavedKmpTaskTQTy.isNull()) {
4123 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4124 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4125 }
4126 KmpTaskTQTy = SavedKmpTaskTQTy;
4127 }
4128 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4129 // Build particular struct kmp_task_t for the given task.
4130 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4131 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4132 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4133 QualType KmpTaskTWithPrivatesPtrQTy =
4134 C.getPointerType(KmpTaskTWithPrivatesQTy);
4135 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4136 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4137 KmpTaskTWithPrivatesTy->getPointerTo();
4138 llvm::Value *KmpTaskTWithPrivatesTySize =
4139 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4140 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4141
4142 // Emit initial values for private copies (if any).
4143 llvm::Value *TaskPrivatesMap = nullptr;
4144 llvm::Type *TaskPrivatesMapTy =
4145 std::next(TaskFunction->arg_begin(), 3)->getType();
4146 if (!Privates.empty()) {
4147 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4148 TaskPrivatesMap = emitTaskPrivateMappingFunction(
4149 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4150 FI->getType(), Privates);
4151 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4152 TaskPrivatesMap, TaskPrivatesMapTy);
4153 } else {
4154 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4155 cast<llvm::PointerType>(TaskPrivatesMapTy));
4156 }
4157 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4158 // kmp_task_t *tt);
4159 llvm::Function *TaskEntry = emitProxyTaskFunction(
4160 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4161 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4162 TaskPrivatesMap);
4163
4164 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4165 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4166 // kmp_routine_entry_t *task_entry);
4167 // Task flags. Format is taken from
4168 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4169 // description of kmp_tasking_flags struct.
4170 enum {
4171 TiedFlag = 0x1,
4172 FinalFlag = 0x2,
4173 DestructorsFlag = 0x8,
4174 PriorityFlag = 0x20,
4175 DetachableFlag = 0x40,
4176 };
4177 unsigned Flags = Data.Tied ? TiedFlag : 0;
4178 bool NeedsCleanup = false;
4179 if (!Privates.empty()) {
4180 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4181 if (NeedsCleanup)
4182 Flags = Flags | DestructorsFlag;
4183 }
4184 if (Data.Priority.getInt())
4185 Flags = Flags | PriorityFlag;
4186 if (D.hasClausesOfKind<OMPDetachClause>())
4187 Flags = Flags | DetachableFlag;
4188 llvm::Value *TaskFlags =
4189 Data.Final.getPointer()
4190 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4191 CGF.Builder.getInt32(FinalFlag),
4192 CGF.Builder.getInt32(/*C=*/0))
4193 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4194 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4195 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4196 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4197 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4198 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4199 TaskEntry, KmpRoutineEntryPtrTy)};
4200 llvm::Value *NewTask;
4201 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4202 // Check if we have any device clause associated with the directive.
4203 const Expr *Device = nullptr;
4204 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4205 Device = C->getDevice();
4206 // Emit device ID if any otherwise use default value.
4207 llvm::Value *DeviceID;
4208 if (Device)
4209 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4210 CGF.Int64Ty, /*isSigned=*/true);
4211 else
4212 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4213 AllocArgs.push_back(DeviceID);
4214 NewTask = CGF.EmitRuntimeCall(
4215 OMPBuilder.getOrCreateRuntimeFunction(
4216 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4217 AllocArgs);
4218 } else {
4219 NewTask =
4220 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4221 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4222 AllocArgs);
4223 }
4224 // Emit detach clause initialization.
4225 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4226 // task_descriptor);
4227 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4228 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4229 LValue EvtLVal = CGF.EmitLValue(Evt);
4230
4231 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4232 // int gtid, kmp_task_t *task);
4233 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4234 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4235 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4236 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4237 OMPBuilder.getOrCreateRuntimeFunction(
4238 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4239 {Loc, Tid, NewTask});
4240 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4241 Evt->getExprLoc());
4242 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4243 }
4244 // Process affinity clauses.
4245 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4246 // Process list of affinity data.
4247 ASTContext &C = CGM.getContext();
4248 Address AffinitiesArray = Address::invalid();
4249 // Calculate number of elements to form the array of affinity data.
4250 llvm::Value *NumOfElements = nullptr;
4251 unsigned NumAffinities = 0;
4252 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4253 if (const Expr *Modifier = C->getModifier()) {
4254 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4255 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4256 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4257 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4258 NumOfElements =
4259 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4260 }
4261 } else {
4262 NumAffinities += C->varlist_size();
4263 }
4264 }
4265 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4266 // Fields ids in kmp_task_affinity_info record.
4267 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4268
4269 QualType KmpTaskAffinityInfoArrayTy;
4270 if (NumOfElements) {
4271 NumOfElements = CGF.Builder.CreateNUWAdd(
4272 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4273 OpaqueValueExpr OVE(
4274 Loc,
4275 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4276 VK_RValue);
4277 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4278 RValue::get(NumOfElements));
4279 KmpTaskAffinityInfoArrayTy =
4280 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4281 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4282 // Properly emit variable-sized array.
4283 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4284 ImplicitParamDecl::Other);
4285 CGF.EmitVarDecl(*PD);
4286 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4287 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4288 /*isSigned=*/false);
4289 } else {
4290 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4291 KmpTaskAffinityInfoTy,
4292 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4293 ArrayType::Normal, /*IndexTypeQuals=*/0);
4294 AffinitiesArray =
4295 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4296 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4297 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4298 /*isSigned=*/false);
4299 }
4300
4301 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4302 // Fill array by elements without iterators.
4303 unsigned Pos = 0;
4304 bool HasIterator = false;
4305 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4306 if (C->getModifier()) {
4307 HasIterator = true;
4308 continue;
4309 }
4310 for (const Expr *E : C->varlists()) {
4311 llvm::Value *Addr;
4312 llvm::Value *Size;
4313 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4314 LValue Base =
4315 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4316 KmpTaskAffinityInfoTy);
4317 // affs[i].base_addr = &<Affinities[i].second>;
4318 LValue BaseAddrLVal = CGF.EmitLValueForField(
4319 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4320 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4321 BaseAddrLVal);
4322 // affs[i].len = sizeof(<Affinities[i].second>);
4323 LValue LenLVal = CGF.EmitLValueForField(
4324 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4325 CGF.EmitStoreOfScalar(Size, LenLVal);
4326 ++Pos;
4327 }
4328 }
4329 LValue PosLVal;
4330 if (HasIterator) {
4331 PosLVal = CGF.MakeAddrLValue(
4332 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4333 C.getSizeType());
4334 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4335 }
4336 // Process elements with iterators.
4337 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4338 const Expr *Modifier = C->getModifier();
4339 if (!Modifier)
4340 continue;
4341 OMPIteratorGeneratorScope IteratorScope(
4342 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4343 for (const Expr *E : C->varlists()) {
4344 llvm::Value *Addr;
4345 llvm::Value *Size;
4346 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4347 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4348 LValue Base = CGF.MakeAddrLValue(
4349 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4350 AffinitiesArray.getAlignment()),
4351 KmpTaskAffinityInfoTy);
4352 // affs[i].base_addr = &<Affinities[i].second>;
4353 LValue BaseAddrLVal = CGF.EmitLValueForField(
4354 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4355 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4356 BaseAddrLVal);
4357 // affs[i].len = sizeof(<Affinities[i].second>);
4358 LValue LenLVal = CGF.EmitLValueForField(
4359 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4360 CGF.EmitStoreOfScalar(Size, LenLVal);
4361 Idx = CGF.Builder.CreateNUWAdd(
4362 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4363 CGF.EmitStoreOfScalar(Idx, PosLVal);
4364 }
4365 }
4366 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4367 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4368 // naffins, kmp_task_affinity_info_t *affin_list);
4369 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4370 llvm::Value *GTid = getThreadID(CGF, Loc);
4371 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4372 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4373 // FIXME: Emit the function and ignore its result for now unless the
4374 // runtime function is properly implemented.
4375 (void)CGF.EmitRuntimeCall(
4376 OMPBuilder.getOrCreateRuntimeFunction(
4377 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4378 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4379 }
4380 llvm::Value *NewTaskNewTaskTTy =
4381 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4382 NewTask, KmpTaskTWithPrivatesPtrTy);
4383 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4384 KmpTaskTWithPrivatesQTy);
4385 LValue TDBase =
4386 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4387 // Fill the data in the resulting kmp_task_t record.
4388 // Copy shareds if there are any.
4389 Address KmpTaskSharedsPtr = Address::invalid();
4390 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4391 KmpTaskSharedsPtr =
4392 Address(CGF.EmitLoadOfScalar(
4393 CGF.EmitLValueForField(
4394 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4395 KmpTaskTShareds)),
4396 Loc),
4397 CGM.getNaturalTypeAlignment(SharedsTy));
4398 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4399 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4400 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4401 }
4402 // Emit initial values for private copies (if any).
4403 TaskResultTy Result;
4404 if (!Privates.empty()) {
4405 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4406 SharedsTy, SharedsPtrTy, Data, Privates,
4407 /*ForDup=*/false);
4408 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4409 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4410 Result.TaskDupFn = emitTaskDupFunction(
4411 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4412 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4413 /*WithLastIter=*/!Data.LastprivateVars.empty());
4414 }
4415 }
4416 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4417 enum { Priority = 0, Destructors = 1 };
4418 // Provide pointer to function with destructors for privates.
4419 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4420 const RecordDecl *KmpCmplrdataUD =
4421 (*FI)->getType()->getAsUnionType()->getDecl();
4422 if (NeedsCleanup) {
4423 llvm::Value *DestructorFn = emitDestructorsFunction(
4424 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4425 KmpTaskTWithPrivatesQTy);
4426 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4427 LValue DestructorsLV = CGF.EmitLValueForField(
4428 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4429 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4430 DestructorFn, KmpRoutineEntryPtrTy),
4431 DestructorsLV);
4432 }
4433 // Set priority.
4434 if (Data.Priority.getInt()) {
4435 LValue Data2LV = CGF.EmitLValueForField(
4436 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4437 LValue PriorityLV = CGF.EmitLValueForField(
4438 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4439 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4440 }
4441 Result.NewTask = NewTask;
4442 Result.TaskEntry = TaskEntry;
4443 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4444 Result.TDBase = TDBase;
4445 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4446 return Result;
4447 }
4448
4449 namespace {
4450 /// Dependence kind for RTL.
4451 enum RTLDependenceKindTy {
4452 DepIn = 0x01,
4453 DepInOut = 0x3,
4454 DepMutexInOutSet = 0x4
4455 };
4456 /// Fields ids in kmp_depend_info record.
4457 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4458 } // namespace
4459
4460 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4461 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4462 RTLDependenceKindTy DepKind;
4463 switch (K) {
4464 case OMPC_DEPEND_in:
4465 DepKind = DepIn;
4466 break;
4467 // Out and InOut dependencies must use the same code.
4468 case OMPC_DEPEND_out:
4469 case OMPC_DEPEND_inout:
4470 DepKind = DepInOut;
4471 break;
4472 case OMPC_DEPEND_mutexinoutset:
4473 DepKind = DepMutexInOutSet;
4474 break;
4475 case OMPC_DEPEND_source:
4476 case OMPC_DEPEND_sink:
4477 case OMPC_DEPEND_depobj:
4478 case OMPC_DEPEND_unknown:
4479 llvm_unreachable("Unknown task dependence type");
4480 }
4481 return DepKind;
4482 }
4483
4484 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4485 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4486 QualType &FlagsTy) {
4487 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4488 if (KmpDependInfoTy.isNull()) {
4489 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4490 KmpDependInfoRD->startDefinition();
4491 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4492 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4493 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4494 KmpDependInfoRD->completeDefinition();
4495 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4496 }
4497 }
4498
4499 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4500 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4501 SourceLocation Loc) {
4502 ASTContext &C = CGM.getContext();
4503 QualType FlagsTy;
4504 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4505 RecordDecl *KmpDependInfoRD =
4506 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4507 LValue Base = CGF.EmitLoadOfPointerLValue(
4508 DepobjLVal.getAddress(CGF),
4509 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4510 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4511 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4512 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4513 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4514 Base.getTBAAInfo());
4515 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4516 Addr.getPointer(),
4517 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4518 LValue NumDepsBase = CGF.MakeAddrLValue(
4519 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4520 Base.getBaseInfo(), Base.getTBAAInfo());
4521 // NumDeps = deps[i].base_addr;
4522 LValue BaseAddrLVal = CGF.EmitLValueForField(
4523 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4524 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4525 return std::make_pair(NumDeps, Base);
4526 }
4527
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4528 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4529 llvm::PointerUnion<unsigned *, LValue *> Pos,
4530 const OMPTaskDataTy::DependData &Data,
4531 Address DependenciesArray) {
4532 CodeGenModule &CGM = CGF.CGM;
4533 ASTContext &C = CGM.getContext();
4534 QualType FlagsTy;
4535 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4536 RecordDecl *KmpDependInfoRD =
4537 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4538 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4539
4540 OMPIteratorGeneratorScope IteratorScope(
4541 CGF, cast_or_null<OMPIteratorExpr>(
4542 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4543 : nullptr));
4544 for (const Expr *E : Data.DepExprs) {
4545 llvm::Value *Addr;
4546 llvm::Value *Size;
4547 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4548 LValue Base;
4549 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4550 Base = CGF.MakeAddrLValue(
4551 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4552 } else {
4553 LValue &PosLVal = *Pos.get<LValue *>();
4554 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4555 Base = CGF.MakeAddrLValue(
4556 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4557 DependenciesArray.getAlignment()),
4558 KmpDependInfoTy);
4559 }
4560 // deps[i].base_addr = &<Dependencies[i].second>;
4561 LValue BaseAddrLVal = CGF.EmitLValueForField(
4562 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4563 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4564 BaseAddrLVal);
4565 // deps[i].len = sizeof(<Dependencies[i].second>);
4566 LValue LenLVal = CGF.EmitLValueForField(
4567 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4568 CGF.EmitStoreOfScalar(Size, LenLVal);
4569 // deps[i].flags = <Dependencies[i].first>;
4570 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4571 LValue FlagsLVal = CGF.EmitLValueForField(
4572 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4573 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4574 FlagsLVal);
4575 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4576 ++(*P);
4577 } else {
4578 LValue &PosLVal = *Pos.get<LValue *>();
4579 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4580 Idx = CGF.Builder.CreateNUWAdd(Idx,
4581 llvm::ConstantInt::get(Idx->getType(), 1));
4582 CGF.EmitStoreOfScalar(Idx, PosLVal);
4583 }
4584 }
4585 }
4586
4587 static SmallVector<llvm::Value *, 4>
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4588 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4589 const OMPTaskDataTy::DependData &Data) {
4590 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4591 "Expected depobj dependecy kind.");
4592 SmallVector<llvm::Value *, 4> Sizes;
4593 SmallVector<LValue, 4> SizeLVals;
4594 ASTContext &C = CGF.getContext();
4595 QualType FlagsTy;
4596 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597 RecordDecl *KmpDependInfoRD =
4598 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4600 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4601 {
4602 OMPIteratorGeneratorScope IteratorScope(
4603 CGF, cast_or_null<OMPIteratorExpr>(
4604 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4605 : nullptr));
4606 for (const Expr *E : Data.DepExprs) {
4607 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4608 LValue Base = CGF.EmitLoadOfPointerLValue(
4609 DepobjLVal.getAddress(CGF),
4610 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4611 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4612 Base.getAddress(CGF), KmpDependInfoPtrT);
4613 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4614 Base.getTBAAInfo());
4615 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4616 Addr.getPointer(),
4617 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4618 LValue NumDepsBase = CGF.MakeAddrLValue(
4619 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4620 Base.getBaseInfo(), Base.getTBAAInfo());
4621 // NumDeps = deps[i].base_addr;
4622 LValue BaseAddrLVal = CGF.EmitLValueForField(
4623 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4624 llvm::Value *NumDeps =
4625 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4626 LValue NumLVal = CGF.MakeAddrLValue(
4627 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4628 C.getUIntPtrType());
4629 CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4630 llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4631 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4632 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4633 CGF.EmitStoreOfScalar(Add, NumLVal);
4634 SizeLVals.push_back(NumLVal);
4635 }
4636 }
4637 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4638 llvm::Value *Size =
4639 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4640 Sizes.push_back(Size);
4641 }
4642 return Sizes;
4643 }
4644
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4645 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4646 LValue PosLVal,
4647 const OMPTaskDataTy::DependData &Data,
4648 Address DependenciesArray) {
4649 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4650 "Expected depobj dependecy kind.");
4651 ASTContext &C = CGF.getContext();
4652 QualType FlagsTy;
4653 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4654 RecordDecl *KmpDependInfoRD =
4655 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4656 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4657 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4658 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4659 {
4660 OMPIteratorGeneratorScope IteratorScope(
4661 CGF, cast_or_null<OMPIteratorExpr>(
4662 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4663 : nullptr));
4664 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4665 const Expr *E = Data.DepExprs[I];
4666 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4667 LValue Base = CGF.EmitLoadOfPointerLValue(
4668 DepobjLVal.getAddress(CGF),
4669 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4670 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4671 Base.getAddress(CGF), KmpDependInfoPtrT);
4672 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4673 Base.getTBAAInfo());
4674
4675 // Get number of elements in a single depobj.
4676 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4677 Addr.getPointer(),
4678 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4679 LValue NumDepsBase = CGF.MakeAddrLValue(
4680 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4681 Base.getBaseInfo(), Base.getTBAAInfo());
4682 // NumDeps = deps[i].base_addr;
4683 LValue BaseAddrLVal = CGF.EmitLValueForField(
4684 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4685 llvm::Value *NumDeps =
4686 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4687
4688 // memcopy dependency data.
4689 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4690 ElSize,
4691 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4692 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4693 Address DepAddr =
4694 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4695 DependenciesArray.getAlignment());
4696 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4697
4698 // Increase pos.
4699 // pos += size;
4700 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4701 CGF.EmitStoreOfScalar(Add, PosLVal);
4702 }
4703 }
4704 }
4705
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4706 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4707 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4708 SourceLocation Loc) {
4709 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4710 return D.DepExprs.empty();
4711 }))
4712 return std::make_pair(nullptr, Address::invalid());
4713 // Process list of dependencies.
4714 ASTContext &C = CGM.getContext();
4715 Address DependenciesArray = Address::invalid();
4716 llvm::Value *NumOfElements = nullptr;
4717 unsigned NumDependencies = std::accumulate(
4718 Dependencies.begin(), Dependencies.end(), 0,
4719 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4720 return D.DepKind == OMPC_DEPEND_depobj
4721 ? V
4722 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4723 });
4724 QualType FlagsTy;
4725 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4726 bool HasDepobjDeps = false;
4727 bool HasRegularWithIterators = false;
4728 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4729 llvm::Value *NumOfRegularWithIterators =
4730 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4731 // Calculate number of depobj dependecies and regular deps with the iterators.
4732 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4733 if (D.DepKind == OMPC_DEPEND_depobj) {
4734 SmallVector<llvm::Value *, 4> Sizes =
4735 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4736 for (llvm::Value *Size : Sizes) {
4737 NumOfDepobjElements =
4738 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4739 }
4740 HasDepobjDeps = true;
4741 continue;
4742 }
4743 // Include number of iterations, if any.
4744 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4745 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4746 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4747 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4748 NumOfRegularWithIterators =
4749 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4750 }
4751 HasRegularWithIterators = true;
4752 continue;
4753 }
4754 }
4755
4756 QualType KmpDependInfoArrayTy;
4757 if (HasDepobjDeps || HasRegularWithIterators) {
4758 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4759 /*isSigned=*/false);
4760 if (HasDepobjDeps) {
4761 NumOfElements =
4762 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4763 }
4764 if (HasRegularWithIterators) {
4765 NumOfElements =
4766 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4767 }
4768 OpaqueValueExpr OVE(Loc,
4769 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4770 VK_RValue);
4771 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4772 RValue::get(NumOfElements));
4773 KmpDependInfoArrayTy =
4774 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4775 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4776 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4777 // Properly emit variable-sized array.
4778 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4779 ImplicitParamDecl::Other);
4780 CGF.EmitVarDecl(*PD);
4781 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4782 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4783 /*isSigned=*/false);
4784 } else {
4785 KmpDependInfoArrayTy = C.getConstantArrayType(
4786 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4787 ArrayType::Normal, /*IndexTypeQuals=*/0);
4788 DependenciesArray =
4789 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4790 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4791 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4792 /*isSigned=*/false);
4793 }
4794 unsigned Pos = 0;
4795 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4796 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4797 Dependencies[I].IteratorExpr)
4798 continue;
4799 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4800 DependenciesArray);
4801 }
4802 // Copy regular dependecies with iterators.
4803 LValue PosLVal = CGF.MakeAddrLValue(
4804 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4805 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4806 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4807 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4808 !Dependencies[I].IteratorExpr)
4809 continue;
4810 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4811 DependenciesArray);
4812 }
4813 // Copy final depobj arrays without iterators.
4814 if (HasDepobjDeps) {
4815 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4816 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4817 continue;
4818 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4819 DependenciesArray);
4820 }
4821 }
4822 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4823 DependenciesArray, CGF.VoidPtrTy);
4824 return std::make_pair(NumOfElements, DependenciesArray);
4825 }
4826
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4827 Address CGOpenMPRuntime::emitDepobjDependClause(
4828 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4829 SourceLocation Loc) {
4830 if (Dependencies.DepExprs.empty())
4831 return Address::invalid();
4832 // Process list of dependencies.
4833 ASTContext &C = CGM.getContext();
4834 Address DependenciesArray = Address::invalid();
4835 unsigned NumDependencies = Dependencies.DepExprs.size();
4836 QualType FlagsTy;
4837 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4838 RecordDecl *KmpDependInfoRD =
4839 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4840
4841 llvm::Value *Size;
4842 // Define type kmp_depend_info[<Dependencies.size()>];
4843 // For depobj reserve one extra element to store the number of elements.
4844 // It is required to handle depobj(x) update(in) construct.
4845 // kmp_depend_info[<Dependencies.size()>] deps;
4846 llvm::Value *NumDepsVal;
4847 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4848 if (const auto *IE =
4849 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4850 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4851 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4852 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4853 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4854 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4855 }
4856 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4857 NumDepsVal);
4858 CharUnits SizeInBytes =
4859 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4860 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4861 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4862 NumDepsVal =
4863 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4864 } else {
4865 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4866 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4867 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4868 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4869 Size = CGM.getSize(Sz.alignTo(Align));
4870 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4871 }
4872 // Need to allocate on the dynamic memory.
4873 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4874 // Use default allocator.
4875 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4876 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4877
4878 llvm::Value *Addr =
4879 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4880 CGM.getModule(), OMPRTL___kmpc_alloc),
4881 Args, ".dep.arr.addr");
4882 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4883 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4884 DependenciesArray = Address(Addr, Align);
4885 // Write number of elements in the first element of array for depobj.
4886 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4887 // deps[i].base_addr = NumDependencies;
4888 LValue BaseAddrLVal = CGF.EmitLValueForField(
4889 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4890 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4891 llvm::PointerUnion<unsigned *, LValue *> Pos;
4892 unsigned Idx = 1;
4893 LValue PosLVal;
4894 if (Dependencies.IteratorExpr) {
4895 PosLVal = CGF.MakeAddrLValue(
4896 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4897 C.getSizeType());
4898 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4899 /*IsInit=*/true);
4900 Pos = &PosLVal;
4901 } else {
4902 Pos = &Idx;
4903 }
4904 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4905 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4906 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4907 return DependenciesArray;
4908 }
4909
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4910 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4911 SourceLocation Loc) {
4912 ASTContext &C = CGM.getContext();
4913 QualType FlagsTy;
4914 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4915 LValue Base = CGF.EmitLoadOfPointerLValue(
4916 DepobjLVal.getAddress(CGF),
4917 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4918 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4919 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4920 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4921 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4922 Addr.getPointer(),
4923 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4924 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4925 CGF.VoidPtrTy);
4926 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4927 // Use default allocator.
4928 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4929 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4930
4931 // _kmpc_free(gtid, addr, nullptr);
4932 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4933 CGM.getModule(), OMPRTL___kmpc_free),
4934 Args);
4935 }
4936
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)4937 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4938 OpenMPDependClauseKind NewDepKind,
4939 SourceLocation Loc) {
4940 ASTContext &C = CGM.getContext();
4941 QualType FlagsTy;
4942 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4943 RecordDecl *KmpDependInfoRD =
4944 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4945 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4946 llvm::Value *NumDeps;
4947 LValue Base;
4948 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4949
4950 Address Begin = Base.getAddress(CGF);
4951 // Cast from pointer to array type to pointer to single element.
4952 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
4953 // The basic structure here is a while-do loop.
4954 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4955 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4956 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4957 CGF.EmitBlock(BodyBB);
4958 llvm::PHINode *ElementPHI =
4959 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4960 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4961 Begin = Address(ElementPHI, Begin.getAlignment());
4962 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4963 Base.getTBAAInfo());
4964 // deps[i].flags = NewDepKind;
4965 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4966 LValue FlagsLVal = CGF.EmitLValueForField(
4967 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4968 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4969 FlagsLVal);
4970
4971 // Shift the address forward by one element.
4972 Address ElementNext =
4973 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4974 ElementPHI->addIncoming(ElementNext.getPointer(),
4975 CGF.Builder.GetInsertBlock());
4976 llvm::Value *IsEmpty =
4977 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4978 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4979 // Done.
4980 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4981 }
4982
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)4983 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4984 const OMPExecutableDirective &D,
4985 llvm::Function *TaskFunction,
4986 QualType SharedsTy, Address Shareds,
4987 const Expr *IfCond,
4988 const OMPTaskDataTy &Data) {
4989 if (!CGF.HaveInsertPoint())
4990 return;
4991
4992 TaskResultTy Result =
4993 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4994 llvm::Value *NewTask = Result.NewTask;
4995 llvm::Function *TaskEntry = Result.TaskEntry;
4996 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4997 LValue TDBase = Result.TDBase;
4998 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4999 // Process list of dependences.
5000 Address DependenciesArray = Address::invalid();
5001 llvm::Value *NumOfElements;
5002 std::tie(NumOfElements, DependenciesArray) =
5003 emitDependClause(CGF, Data.Dependences, Loc);
5004
5005 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5006 // libcall.
5007 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5008 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5009 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5010 // list is not empty
5011 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5012 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5013 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5014 llvm::Value *DepTaskArgs[7];
5015 if (!Data.Dependences.empty()) {
5016 DepTaskArgs[0] = UpLoc;
5017 DepTaskArgs[1] = ThreadID;
5018 DepTaskArgs[2] = NewTask;
5019 DepTaskArgs[3] = NumOfElements;
5020 DepTaskArgs[4] = DependenciesArray.getPointer();
5021 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5022 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5023 }
5024 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5025 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5026 if (!Data.Tied) {
5027 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5028 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5029 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5030 }
5031 if (!Data.Dependences.empty()) {
5032 CGF.EmitRuntimeCall(
5033 OMPBuilder.getOrCreateRuntimeFunction(
5034 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5035 DepTaskArgs);
5036 } else {
5037 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5038 CGM.getModule(), OMPRTL___kmpc_omp_task),
5039 TaskArgs);
5040 }
5041 // Check if parent region is untied and build return for untied task;
5042 if (auto *Region =
5043 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5044 Region->emitUntiedSwitch(CGF);
5045 };
5046
5047 llvm::Value *DepWaitTaskArgs[6];
5048 if (!Data.Dependences.empty()) {
5049 DepWaitTaskArgs[0] = UpLoc;
5050 DepWaitTaskArgs[1] = ThreadID;
5051 DepWaitTaskArgs[2] = NumOfElements;
5052 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5053 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5054 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5055 }
5056 auto &M = CGM.getModule();
5057 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5058 TaskEntry, &Data, &DepWaitTaskArgs,
5059 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5060 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5061 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5062 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5063 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5064 // is specified.
5065 if (!Data.Dependences.empty())
5066 CGF.EmitRuntimeCall(
5067 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5068 DepWaitTaskArgs);
5069 // Call proxy_task_entry(gtid, new_task);
5070 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5071 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5072 Action.Enter(CGF);
5073 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5074 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5075 OutlinedFnArgs);
5076 };
5077
5078 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5079 // kmp_task_t *new_task);
5080 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5081 // kmp_task_t *new_task);
5082 RegionCodeGenTy RCG(CodeGen);
5083 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5084 M, OMPRTL___kmpc_omp_task_begin_if0),
5085 TaskArgs,
5086 OMPBuilder.getOrCreateRuntimeFunction(
5087 M, OMPRTL___kmpc_omp_task_complete_if0),
5088 TaskArgs);
5089 RCG.setAction(Action);
5090 RCG(CGF);
5091 };
5092
5093 if (IfCond) {
5094 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5095 } else {
5096 RegionCodeGenTy ThenRCG(ThenCodeGen);
5097 ThenRCG(CGF);
5098 }
5099 }
5100
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5101 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5102 const OMPLoopDirective &D,
5103 llvm::Function *TaskFunction,
5104 QualType SharedsTy, Address Shareds,
5105 const Expr *IfCond,
5106 const OMPTaskDataTy &Data) {
5107 if (!CGF.HaveInsertPoint())
5108 return;
5109 TaskResultTy Result =
5110 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5111 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5112 // libcall.
5113 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5114 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5115 // sched, kmp_uint64 grainsize, void *task_dup);
5116 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5117 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5118 llvm::Value *IfVal;
5119 if (IfCond) {
5120 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5121 /*isSigned=*/true);
5122 } else {
5123 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5124 }
5125
5126 LValue LBLVal = CGF.EmitLValueForField(
5127 Result.TDBase,
5128 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5129 const auto *LBVar =
5130 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5131 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5132 LBLVal.getQuals(),
5133 /*IsInitializer=*/true);
5134 LValue UBLVal = CGF.EmitLValueForField(
5135 Result.TDBase,
5136 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5137 const auto *UBVar =
5138 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5139 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5140 UBLVal.getQuals(),
5141 /*IsInitializer=*/true);
5142 LValue StLVal = CGF.EmitLValueForField(
5143 Result.TDBase,
5144 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5145 const auto *StVar =
5146 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5147 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5148 StLVal.getQuals(),
5149 /*IsInitializer=*/true);
5150 // Store reductions address.
5151 LValue RedLVal = CGF.EmitLValueForField(
5152 Result.TDBase,
5153 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5154 if (Data.Reductions) {
5155 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5156 } else {
5157 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5158 CGF.getContext().VoidPtrTy);
5159 }
5160 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5161 llvm::Value *TaskArgs[] = {
5162 UpLoc,
5163 ThreadID,
5164 Result.NewTask,
5165 IfVal,
5166 LBLVal.getPointer(CGF),
5167 UBLVal.getPointer(CGF),
5168 CGF.EmitLoadOfScalar(StLVal, Loc),
5169 llvm::ConstantInt::getSigned(
5170 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5171 llvm::ConstantInt::getSigned(
5172 CGF.IntTy, Data.Schedule.getPointer()
5173 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5174 : NoSchedule),
5175 Data.Schedule.getPointer()
5176 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5177 /*isSigned=*/false)
5178 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5179 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5180 Result.TaskDupFn, CGF.VoidPtrTy)
5181 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5182 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5183 CGM.getModule(), OMPRTL___kmpc_taskloop),
5184 TaskArgs);
5185 }
5186
5187 /// Emit reduction operation for each element of array (required for
5188 /// array sections) LHS op = RHS.
5189 /// \param Type Type of array.
5190 /// \param LHSVar Variable on the left side of the reduction operation
5191 /// (references element of array in original variable).
5192 /// \param RHSVar Variable on the right side of the reduction operation
5193 /// (references element of array in original variable).
5194 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5195 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5196 static void EmitOMPAggregateReduction(
5197 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5198 const VarDecl *RHSVar,
5199 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5200 const Expr *, const Expr *)> &RedOpGen,
5201 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5202 const Expr *UpExpr = nullptr) {
5203 // Perform element-by-element initialization.
5204 QualType ElementTy;
5205 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5206 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5207
5208 // Drill down to the base element type on both arrays.
5209 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5210 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5211
5212 llvm::Value *RHSBegin = RHSAddr.getPointer();
5213 llvm::Value *LHSBegin = LHSAddr.getPointer();
5214 // Cast from pointer to array type to pointer to single element.
5215 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5216 // The basic structure here is a while-do loop.
5217 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5218 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5219 llvm::Value *IsEmpty =
5220 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5221 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5222
5223 // Enter the loop body, making that address the current address.
5224 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5225 CGF.EmitBlock(BodyBB);
5226
5227 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5228
5229 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5230 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5231 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5232 Address RHSElementCurrent =
5233 Address(RHSElementPHI,
5234 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5235
5236 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5237 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5238 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5239 Address LHSElementCurrent =
5240 Address(LHSElementPHI,
5241 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5242
5243 // Emit copy.
5244 CodeGenFunction::OMPPrivateScope Scope(CGF);
5245 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5246 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5247 Scope.Privatize();
5248 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5249 Scope.ForceCleanup();
5250
5251 // Shift the address forward by one element.
5252 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5253 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5254 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5255 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5256 // Check whether we've reached the end.
5257 llvm::Value *Done =
5258 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5259 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5260 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5261 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5262
5263 // Done.
5264 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5265 }
5266
5267 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5268 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5269 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5270 static void emitReductionCombiner(CodeGenFunction &CGF,
5271 const Expr *ReductionOp) {
5272 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5273 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5274 if (const auto *DRE =
5275 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5276 if (const auto *DRD =
5277 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5278 std::pair<llvm::Function *, llvm::Function *> Reduction =
5279 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5280 RValue Func = RValue::get(Reduction.first);
5281 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5282 CGF.EmitIgnoredExpr(ReductionOp);
5283 return;
5284 }
5285 CGF.EmitIgnoredExpr(ReductionOp);
5286 }
5287
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5288 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5289 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5290 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5291 ArrayRef<const Expr *> ReductionOps) {
5292 ASTContext &C = CGM.getContext();
5293
5294 // void reduction_func(void *LHSArg, void *RHSArg);
5295 FunctionArgList Args;
5296 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5297 ImplicitParamDecl::Other);
5298 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5299 ImplicitParamDecl::Other);
5300 Args.push_back(&LHSArg);
5301 Args.push_back(&RHSArg);
5302 const auto &CGFI =
5303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5304 std::string Name = getName({"omp", "reduction", "reduction_func"});
5305 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5306 llvm::GlobalValue::InternalLinkage, Name,
5307 &CGM.getModule());
5308 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5309 Fn->setDoesNotRecurse();
5310 CodeGenFunction CGF(CGM);
5311 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5312
5313 // Dst = (void*[n])(LHSArg);
5314 // Src = (void*[n])(RHSArg);
5315 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5316 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5317 ArgsType), CGF.getPointerAlign());
5318 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5319 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5320 ArgsType), CGF.getPointerAlign());
5321
5322 // ...
5323 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5324 // ...
5325 CodeGenFunction::OMPPrivateScope Scope(CGF);
5326 auto IPriv = Privates.begin();
5327 unsigned Idx = 0;
5328 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5329 const auto *RHSVar =
5330 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5331 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5332 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5333 });
5334 const auto *LHSVar =
5335 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5336 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5337 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5338 });
5339 QualType PrivTy = (*IPriv)->getType();
5340 if (PrivTy->isVariablyModifiedType()) {
5341 // Get array size and emit VLA type.
5342 ++Idx;
5343 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5344 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5345 const VariableArrayType *VLA =
5346 CGF.getContext().getAsVariableArrayType(PrivTy);
5347 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5348 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5349 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5350 CGF.EmitVariablyModifiedType(PrivTy);
5351 }
5352 }
5353 Scope.Privatize();
5354 IPriv = Privates.begin();
5355 auto ILHS = LHSExprs.begin();
5356 auto IRHS = RHSExprs.begin();
5357 for (const Expr *E : ReductionOps) {
5358 if ((*IPriv)->getType()->isArrayType()) {
5359 // Emit reduction for array section.
5360 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5361 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5362 EmitOMPAggregateReduction(
5363 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5364 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5365 emitReductionCombiner(CGF, E);
5366 });
5367 } else {
5368 // Emit reduction for array subscript or single variable.
5369 emitReductionCombiner(CGF, E);
5370 }
5371 ++IPriv;
5372 ++ILHS;
5373 ++IRHS;
5374 }
5375 Scope.ForceCleanup();
5376 CGF.FinishFunction();
5377 return Fn;
5378 }
5379
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5380 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5381 const Expr *ReductionOp,
5382 const Expr *PrivateRef,
5383 const DeclRefExpr *LHS,
5384 const DeclRefExpr *RHS) {
5385 if (PrivateRef->getType()->isArrayType()) {
5386 // Emit reduction for array section.
5387 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5388 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5389 EmitOMPAggregateReduction(
5390 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5391 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5392 emitReductionCombiner(CGF, ReductionOp);
5393 });
5394 } else {
5395 // Emit reduction for array subscript or single variable.
5396 emitReductionCombiner(CGF, ReductionOp);
5397 }
5398 }
5399
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5400 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5401 ArrayRef<const Expr *> Privates,
5402 ArrayRef<const Expr *> LHSExprs,
5403 ArrayRef<const Expr *> RHSExprs,
5404 ArrayRef<const Expr *> ReductionOps,
5405 ReductionOptionsTy Options) {
5406 if (!CGF.HaveInsertPoint())
5407 return;
5408
5409 bool WithNowait = Options.WithNowait;
5410 bool SimpleReduction = Options.SimpleReduction;
5411
5412 // Next code should be emitted for reduction:
5413 //
5414 // static kmp_critical_name lock = { 0 };
5415 //
5416 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5417 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5418 // ...
5419 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5420 // *(Type<n>-1*)rhs[<n>-1]);
5421 // }
5422 //
5423 // ...
5424 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5425 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5426 // RedList, reduce_func, &<lock>)) {
5427 // case 1:
5428 // ...
5429 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5430 // ...
5431 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5432 // break;
5433 // case 2:
5434 // ...
5435 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5436 // ...
5437 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5438 // break;
5439 // default:;
5440 // }
5441 //
5442 // if SimpleReduction is true, only the next code is generated:
5443 // ...
5444 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5445 // ...
5446
5447 ASTContext &C = CGM.getContext();
5448
5449 if (SimpleReduction) {
5450 CodeGenFunction::RunCleanupsScope Scope(CGF);
5451 auto IPriv = Privates.begin();
5452 auto ILHS = LHSExprs.begin();
5453 auto IRHS = RHSExprs.begin();
5454 for (const Expr *E : ReductionOps) {
5455 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5456 cast<DeclRefExpr>(*IRHS));
5457 ++IPriv;
5458 ++ILHS;
5459 ++IRHS;
5460 }
5461 return;
5462 }
5463
5464 // 1. Build a list of reduction variables.
5465 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5466 auto Size = RHSExprs.size();
5467 for (const Expr *E : Privates) {
5468 if (E->getType()->isVariablyModifiedType())
5469 // Reserve place for array size.
5470 ++Size;
5471 }
5472 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5473 QualType ReductionArrayTy =
5474 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5475 /*IndexTypeQuals=*/0);
5476 Address ReductionList =
5477 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5478 auto IPriv = Privates.begin();
5479 unsigned Idx = 0;
5480 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5481 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5482 CGF.Builder.CreateStore(
5483 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5484 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5485 Elem);
5486 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5487 // Store array size.
5488 ++Idx;
5489 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5490 llvm::Value *Size = CGF.Builder.CreateIntCast(
5491 CGF.getVLASize(
5492 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5493 .NumElts,
5494 CGF.SizeTy, /*isSigned=*/false);
5495 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5496 Elem);
5497 }
5498 }
5499
5500 // 2. Emit reduce_func().
5501 llvm::Function *ReductionFn = emitReductionFunction(
5502 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5503 LHSExprs, RHSExprs, ReductionOps);
5504
5505 // 3. Create static kmp_critical_name lock = { 0 };
5506 std::string Name = getName({"reduction"});
5507 llvm::Value *Lock = getCriticalRegionLock(Name);
5508
5509 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5510 // RedList, reduce_func, &<lock>);
5511 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5512 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5513 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5514 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5515 ReductionList.getPointer(), CGF.VoidPtrTy);
5516 llvm::Value *Args[] = {
5517 IdentTLoc, // ident_t *<loc>
5518 ThreadId, // i32 <gtid>
5519 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5520 ReductionArrayTySize, // size_type sizeof(RedList)
5521 RL, // void *RedList
5522 ReductionFn, // void (*) (void *, void *) <reduce_func>
5523 Lock // kmp_critical_name *&<lock>
5524 };
5525 llvm::Value *Res = CGF.EmitRuntimeCall(
5526 OMPBuilder.getOrCreateRuntimeFunction(
5527 CGM.getModule(),
5528 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5529 Args);
5530
5531 // 5. Build switch(res)
5532 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5533 llvm::SwitchInst *SwInst =
5534 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5535
5536 // 6. Build case 1:
5537 // ...
5538 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5539 // ...
5540 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5541 // break;
5542 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5543 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5544 CGF.EmitBlock(Case1BB);
5545
5546 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5547 llvm::Value *EndArgs[] = {
5548 IdentTLoc, // ident_t *<loc>
5549 ThreadId, // i32 <gtid>
5550 Lock // kmp_critical_name *&<lock>
5551 };
5552 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5553 CodeGenFunction &CGF, PrePostActionTy &Action) {
5554 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5555 auto IPriv = Privates.begin();
5556 auto ILHS = LHSExprs.begin();
5557 auto IRHS = RHSExprs.begin();
5558 for (const Expr *E : ReductionOps) {
5559 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5560 cast<DeclRefExpr>(*IRHS));
5561 ++IPriv;
5562 ++ILHS;
5563 ++IRHS;
5564 }
5565 };
5566 RegionCodeGenTy RCG(CodeGen);
5567 CommonActionTy Action(
5568 nullptr, llvm::None,
5569 OMPBuilder.getOrCreateRuntimeFunction(
5570 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5571 : OMPRTL___kmpc_end_reduce),
5572 EndArgs);
5573 RCG.setAction(Action);
5574 RCG(CGF);
5575
5576 CGF.EmitBranch(DefaultBB);
5577
5578 // 7. Build case 2:
5579 // ...
5580 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5581 // ...
5582 // break;
5583 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5584 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5585 CGF.EmitBlock(Case2BB);
5586
5587 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5588 CodeGenFunction &CGF, PrePostActionTy &Action) {
5589 auto ILHS = LHSExprs.begin();
5590 auto IRHS = RHSExprs.begin();
5591 auto IPriv = Privates.begin();
5592 for (const Expr *E : ReductionOps) {
5593 const Expr *XExpr = nullptr;
5594 const Expr *EExpr = nullptr;
5595 const Expr *UpExpr = nullptr;
5596 BinaryOperatorKind BO = BO_Comma;
5597 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5598 if (BO->getOpcode() == BO_Assign) {
5599 XExpr = BO->getLHS();
5600 UpExpr = BO->getRHS();
5601 }
5602 }
5603 // Try to emit update expression as a simple atomic.
5604 const Expr *RHSExpr = UpExpr;
5605 if (RHSExpr) {
5606 // Analyze RHS part of the whole expression.
5607 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5608 RHSExpr->IgnoreParenImpCasts())) {
5609 // If this is a conditional operator, analyze its condition for
5610 // min/max reduction operator.
5611 RHSExpr = ACO->getCond();
5612 }
5613 if (const auto *BORHS =
5614 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5615 EExpr = BORHS->getRHS();
5616 BO = BORHS->getOpcode();
5617 }
5618 }
5619 if (XExpr) {
5620 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5621 auto &&AtomicRedGen = [BO, VD,
5622 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5623 const Expr *EExpr, const Expr *UpExpr) {
5624 LValue X = CGF.EmitLValue(XExpr);
5625 RValue E;
5626 if (EExpr)
5627 E = CGF.EmitAnyExpr(EExpr);
5628 CGF.EmitOMPAtomicSimpleUpdateExpr(
5629 X, E, BO, /*IsXLHSInRHSPart=*/true,
5630 llvm::AtomicOrdering::Monotonic, Loc,
5631 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5632 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5633 PrivateScope.addPrivate(
5634 VD, [&CGF, VD, XRValue, Loc]() {
5635 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5636 CGF.emitOMPSimpleStore(
5637 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5638 VD->getType().getNonReferenceType(), Loc);
5639 return LHSTemp;
5640 });
5641 (void)PrivateScope.Privatize();
5642 return CGF.EmitAnyExpr(UpExpr);
5643 });
5644 };
5645 if ((*IPriv)->getType()->isArrayType()) {
5646 // Emit atomic reduction for array section.
5647 const auto *RHSVar =
5648 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5649 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5650 AtomicRedGen, XExpr, EExpr, UpExpr);
5651 } else {
5652 // Emit atomic reduction for array subscript or single variable.
5653 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5654 }
5655 } else {
5656 // Emit as a critical region.
5657 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5658 const Expr *, const Expr *) {
5659 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5660 std::string Name = RT.getName({"atomic_reduction"});
5661 RT.emitCriticalRegion(
5662 CGF, Name,
5663 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5664 Action.Enter(CGF);
5665 emitReductionCombiner(CGF, E);
5666 },
5667 Loc);
5668 };
5669 if ((*IPriv)->getType()->isArrayType()) {
5670 const auto *LHSVar =
5671 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5672 const auto *RHSVar =
5673 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5674 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5675 CritRedGen);
5676 } else {
5677 CritRedGen(CGF, nullptr, nullptr, nullptr);
5678 }
5679 }
5680 ++ILHS;
5681 ++IRHS;
5682 ++IPriv;
5683 }
5684 };
5685 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5686 if (!WithNowait) {
5687 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5688 llvm::Value *EndArgs[] = {
5689 IdentTLoc, // ident_t *<loc>
5690 ThreadId, // i32 <gtid>
5691 Lock // kmp_critical_name *&<lock>
5692 };
5693 CommonActionTy Action(nullptr, llvm::None,
5694 OMPBuilder.getOrCreateRuntimeFunction(
5695 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5696 EndArgs);
5697 AtomicRCG.setAction(Action);
5698 AtomicRCG(CGF);
5699 } else {
5700 AtomicRCG(CGF);
5701 }
5702
5703 CGF.EmitBranch(DefaultBB);
5704 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5705 }
5706
5707 /// Generates unique name for artificial threadprivate variables.
5708 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5709 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5710 const Expr *Ref) {
5711 SmallString<256> Buffer;
5712 llvm::raw_svector_ostream Out(Buffer);
5713 const clang::DeclRefExpr *DE;
5714 const VarDecl *D = ::getBaseDecl(Ref, DE);
5715 if (!D)
5716 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5717 D = D->getCanonicalDecl();
5718 std::string Name = CGM.getOpenMPRuntime().getName(
5719 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5720 Out << Prefix << Name << "_"
5721 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5722 return std::string(Out.str());
5723 }
5724
5725 /// Emits reduction initializer function:
5726 /// \code
5727 /// void @.red_init(void* %arg, void* %orig) {
5728 /// %0 = bitcast void* %arg to <type>*
5729 /// store <type> <init>, <type>* %0
5730 /// ret void
5731 /// }
5732 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5733 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5734 SourceLocation Loc,
5735 ReductionCodeGen &RCG, unsigned N) {
5736 ASTContext &C = CGM.getContext();
5737 QualType VoidPtrTy = C.VoidPtrTy;
5738 VoidPtrTy.addRestrict();
5739 FunctionArgList Args;
5740 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5741 ImplicitParamDecl::Other);
5742 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5743 ImplicitParamDecl::Other);
5744 Args.emplace_back(&Param);
5745 Args.emplace_back(&ParamOrig);
5746 const auto &FnInfo =
5747 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5748 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5749 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5750 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5751 Name, &CGM.getModule());
5752 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5753 Fn->setDoesNotRecurse();
5754 CodeGenFunction CGF(CGM);
5755 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5756 Address PrivateAddr = CGF.EmitLoadOfPointer(
5757 CGF.GetAddrOfLocalVar(&Param),
5758 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5759 llvm::Value *Size = nullptr;
5760 // If the size of the reduction item is non-constant, load it from global
5761 // threadprivate variable.
5762 if (RCG.getSizes(N).second) {
5763 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5764 CGF, CGM.getContext().getSizeType(),
5765 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5766 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5767 CGM.getContext().getSizeType(), Loc);
5768 }
5769 RCG.emitAggregateType(CGF, N, Size);
5770 LValue OrigLVal;
5771 // If initializer uses initializer from declare reduction construct, emit a
5772 // pointer to the address of the original reduction item (reuired by reduction
5773 // initializer)
5774 if (RCG.usesReductionInitializer(N)) {
5775 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5776 SharedAddr = CGF.EmitLoadOfPointer(
5777 SharedAddr,
5778 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5779 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5780 } else {
5781 OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5782 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5783 CGM.getContext().VoidPtrTy);
5784 }
5785 // Emit the initializer:
5786 // %0 = bitcast void* %arg to <type>*
5787 // store <type> <init>, <type>* %0
5788 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5789 [](CodeGenFunction &) { return false; });
5790 CGF.FinishFunction();
5791 return Fn;
5792 }
5793
5794 /// Emits reduction combiner function:
5795 /// \code
5796 /// void @.red_comb(void* %arg0, void* %arg1) {
5797 /// %lhs = bitcast void* %arg0 to <type>*
5798 /// %rhs = bitcast void* %arg1 to <type>*
5799 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5800 /// store <type> %2, <type>* %lhs
5801 /// ret void
5802 /// }
5803 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5804 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5805 SourceLocation Loc,
5806 ReductionCodeGen &RCG, unsigned N,
5807 const Expr *ReductionOp,
5808 const Expr *LHS, const Expr *RHS,
5809 const Expr *PrivateRef) {
5810 ASTContext &C = CGM.getContext();
5811 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5812 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5813 FunctionArgList Args;
5814 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5815 C.VoidPtrTy, ImplicitParamDecl::Other);
5816 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5817 ImplicitParamDecl::Other);
5818 Args.emplace_back(&ParamInOut);
5819 Args.emplace_back(&ParamIn);
5820 const auto &FnInfo =
5821 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5822 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5823 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5824 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5825 Name, &CGM.getModule());
5826 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5827 Fn->setDoesNotRecurse();
5828 CodeGenFunction CGF(CGM);
5829 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5830 llvm::Value *Size = nullptr;
5831 // If the size of the reduction item is non-constant, load it from global
5832 // threadprivate variable.
5833 if (RCG.getSizes(N).second) {
5834 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5835 CGF, CGM.getContext().getSizeType(),
5836 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5837 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5838 CGM.getContext().getSizeType(), Loc);
5839 }
5840 RCG.emitAggregateType(CGF, N, Size);
5841 // Remap lhs and rhs variables to the addresses of the function arguments.
5842 // %lhs = bitcast void* %arg0 to <type>*
5843 // %rhs = bitcast void* %arg1 to <type>*
5844 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5845 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5846 // Pull out the pointer to the variable.
5847 Address PtrAddr = CGF.EmitLoadOfPointer(
5848 CGF.GetAddrOfLocalVar(&ParamInOut),
5849 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5850 return CGF.Builder.CreateElementBitCast(
5851 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5852 });
5853 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5854 // Pull out the pointer to the variable.
5855 Address PtrAddr = CGF.EmitLoadOfPointer(
5856 CGF.GetAddrOfLocalVar(&ParamIn),
5857 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5858 return CGF.Builder.CreateElementBitCast(
5859 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5860 });
5861 PrivateScope.Privatize();
5862 // Emit the combiner body:
5863 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5864 // store <type> %2, <type>* %lhs
5865 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5866 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5867 cast<DeclRefExpr>(RHS));
5868 CGF.FinishFunction();
5869 return Fn;
5870 }
5871
5872 /// Emits reduction finalizer function:
5873 /// \code
5874 /// void @.red_fini(void* %arg) {
5875 /// %0 = bitcast void* %arg to <type>*
5876 /// <destroy>(<type>* %0)
5877 /// ret void
5878 /// }
5879 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5880 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5881 SourceLocation Loc,
5882 ReductionCodeGen &RCG, unsigned N) {
5883 if (!RCG.needCleanups(N))
5884 return nullptr;
5885 ASTContext &C = CGM.getContext();
5886 FunctionArgList Args;
5887 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5888 ImplicitParamDecl::Other);
5889 Args.emplace_back(&Param);
5890 const auto &FnInfo =
5891 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5892 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5893 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5894 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5895 Name, &CGM.getModule());
5896 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5897 Fn->setDoesNotRecurse();
5898 CodeGenFunction CGF(CGM);
5899 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5900 Address PrivateAddr = CGF.EmitLoadOfPointer(
5901 CGF.GetAddrOfLocalVar(&Param),
5902 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5903 llvm::Value *Size = nullptr;
5904 // If the size of the reduction item is non-constant, load it from global
5905 // threadprivate variable.
5906 if (RCG.getSizes(N).second) {
5907 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5908 CGF, CGM.getContext().getSizeType(),
5909 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5910 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5911 CGM.getContext().getSizeType(), Loc);
5912 }
5913 RCG.emitAggregateType(CGF, N, Size);
5914 // Emit the finalizer body:
5915 // <destroy>(<type>* %0)
5916 RCG.emitCleanups(CGF, N, PrivateAddr);
5917 CGF.FinishFunction(Loc);
5918 return Fn;
5919 }
5920
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)5921 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5922 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5923 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5924 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5925 return nullptr;
5926
5927 // Build typedef struct:
5928 // kmp_taskred_input {
5929 // void *reduce_shar; // shared reduction item
5930 // void *reduce_orig; // original reduction item used for initialization
5931 // size_t reduce_size; // size of data item
5932 // void *reduce_init; // data initialization routine
5933 // void *reduce_fini; // data finalization routine
5934 // void *reduce_comb; // data combiner routine
5935 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5936 // } kmp_taskred_input_t;
5937 ASTContext &C = CGM.getContext();
5938 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5939 RD->startDefinition();
5940 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5941 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5942 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5943 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5944 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5945 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5946 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5947 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5948 RD->completeDefinition();
5949 QualType RDType = C.getRecordType(RD);
5950 unsigned Size = Data.ReductionVars.size();
5951 llvm::APInt ArraySize(/*numBits=*/64, Size);
5952 QualType ArrayRDType = C.getConstantArrayType(
5953 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5954 // kmp_task_red_input_t .rd_input.[Size];
5955 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5956 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5957 Data.ReductionCopies, Data.ReductionOps);
5958 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5959 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5960 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5961 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5962 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5963 TaskRedInput.getPointer(), Idxs,
5964 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5965 ".rd_input.gep.");
5966 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5967 // ElemLVal.reduce_shar = &Shareds[Cnt];
5968 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5969 RCG.emitSharedOrigLValue(CGF, Cnt);
5970 llvm::Value *CastedShared =
5971 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5972 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5973 // ElemLVal.reduce_orig = &Origs[Cnt];
5974 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5975 llvm::Value *CastedOrig =
5976 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5977 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5978 RCG.emitAggregateType(CGF, Cnt);
5979 llvm::Value *SizeValInChars;
5980 llvm::Value *SizeVal;
5981 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5982 // We use delayed creation/initialization for VLAs and array sections. It is
5983 // required because runtime does not provide the way to pass the sizes of
5984 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5985 // threadprivate global variables are used to store these values and use
5986 // them in the functions.
5987 bool DelayedCreation = !!SizeVal;
5988 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5989 /*isSigned=*/false);
5990 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5991 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5992 // ElemLVal.reduce_init = init;
5993 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5994 llvm::Value *InitAddr =
5995 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5996 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5997 // ElemLVal.reduce_fini = fini;
5998 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5999 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6000 llvm::Value *FiniAddr = Fini
6001 ? CGF.EmitCastToVoidPtr(Fini)
6002 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6003 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6004 // ElemLVal.reduce_comb = comb;
6005 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6006 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6007 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6008 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6009 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6010 // ElemLVal.flags = 0;
6011 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6012 if (DelayedCreation) {
6013 CGF.EmitStoreOfScalar(
6014 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6015 FlagsLVal);
6016 } else
6017 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6018 FlagsLVal.getType());
6019 }
6020 if (Data.IsReductionWithTaskMod) {
6021 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6022 // is_ws, int num, void *data);
6023 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6024 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6025 CGM.IntTy, /*isSigned=*/true);
6026 llvm::Value *Args[] = {
6027 IdentTLoc, GTid,
6028 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6029 /*isSigned=*/true),
6030 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6031 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6032 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6033 return CGF.EmitRuntimeCall(
6034 OMPBuilder.getOrCreateRuntimeFunction(
6035 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6036 Args);
6037 }
6038 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6039 llvm::Value *Args[] = {
6040 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6041 /*isSigned=*/true),
6042 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6043 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6044 CGM.VoidPtrTy)};
6045 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6046 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6047 Args);
6048 }
6049
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)6050 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6051 SourceLocation Loc,
6052 bool IsWorksharingReduction) {
6053 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6054 // is_ws, int num, void *data);
6055 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6056 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6057 CGM.IntTy, /*isSigned=*/true);
6058 llvm::Value *Args[] = {IdentTLoc, GTid,
6059 llvm::ConstantInt::get(CGM.IntTy,
6060 IsWorksharingReduction ? 1 : 0,
6061 /*isSigned=*/true)};
6062 (void)CGF.EmitRuntimeCall(
6063 OMPBuilder.getOrCreateRuntimeFunction(
6064 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6065 Args);
6066 }
6067
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6068 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6069 SourceLocation Loc,
6070 ReductionCodeGen &RCG,
6071 unsigned N) {
6072 auto Sizes = RCG.getSizes(N);
6073 // Emit threadprivate global variable if the type is non-constant
6074 // (Sizes.second = nullptr).
6075 if (Sizes.second) {
6076 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6077 /*isSigned=*/false);
6078 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6079 CGF, CGM.getContext().getSizeType(),
6080 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6081 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6082 }
6083 }
6084
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6085 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6086 SourceLocation Loc,
6087 llvm::Value *ReductionsPtr,
6088 LValue SharedLVal) {
6089 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6090 // *d);
6091 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6092 CGM.IntTy,
6093 /*isSigned=*/true),
6094 ReductionsPtr,
6095 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6096 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6097 return Address(
6098 CGF.EmitRuntimeCall(
6099 OMPBuilder.getOrCreateRuntimeFunction(
6100 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6101 Args),
6102 SharedLVal.getAlignment());
6103 }
6104
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)6105 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6106 SourceLocation Loc) {
6107 if (!CGF.HaveInsertPoint())
6108 return;
6109
6110 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6111 OMPBuilder.CreateTaskwait(CGF.Builder);
6112 } else {
6113 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6114 // global_tid);
6115 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6116 // Ignore return result until untied tasks are supported.
6117 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6118 CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6119 Args);
6120 }
6121
6122 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6123 Region->emitUntiedSwitch(CGF);
6124 }
6125
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6126 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6127 OpenMPDirectiveKind InnerKind,
6128 const RegionCodeGenTy &CodeGen,
6129 bool HasCancel) {
6130 if (!CGF.HaveInsertPoint())
6131 return;
6132 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6133 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6134 }
6135
6136 namespace {
6137 enum RTCancelKind {
6138 CancelNoreq = 0,
6139 CancelParallel = 1,
6140 CancelLoop = 2,
6141 CancelSections = 3,
6142 CancelTaskgroup = 4
6143 };
6144 } // anonymous namespace
6145
getCancellationKind(OpenMPDirectiveKind CancelRegion)6146 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6147 RTCancelKind CancelKind = CancelNoreq;
6148 if (CancelRegion == OMPD_parallel)
6149 CancelKind = CancelParallel;
6150 else if (CancelRegion == OMPD_for)
6151 CancelKind = CancelLoop;
6152 else if (CancelRegion == OMPD_sections)
6153 CancelKind = CancelSections;
6154 else {
6155 assert(CancelRegion == OMPD_taskgroup);
6156 CancelKind = CancelTaskgroup;
6157 }
6158 return CancelKind;
6159 }
6160
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6161 void CGOpenMPRuntime::emitCancellationPointCall(
6162 CodeGenFunction &CGF, SourceLocation Loc,
6163 OpenMPDirectiveKind CancelRegion) {
6164 if (!CGF.HaveInsertPoint())
6165 return;
6166 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6167 // global_tid, kmp_int32 cncl_kind);
6168 if (auto *OMPRegionInfo =
6169 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6170 // For 'cancellation point taskgroup', the task region info may not have a
6171 // cancel. This may instead happen in another adjacent task.
6172 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6173 llvm::Value *Args[] = {
6174 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6175 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6176 // Ignore return result until untied tasks are supported.
6177 llvm::Value *Result = CGF.EmitRuntimeCall(
6178 OMPBuilder.getOrCreateRuntimeFunction(
6179 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6180 Args);
6181 // if (__kmpc_cancellationpoint()) {
6182 // exit from construct;
6183 // }
6184 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6185 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6186 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6187 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6188 CGF.EmitBlock(ExitBB);
6189 // exit from construct;
6190 CodeGenFunction::JumpDest CancelDest =
6191 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6192 CGF.EmitBranchThroughCleanup(CancelDest);
6193 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6194 }
6195 }
6196 }
6197
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6198 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6199 const Expr *IfCond,
6200 OpenMPDirectiveKind CancelRegion) {
6201 if (!CGF.HaveInsertPoint())
6202 return;
6203 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6204 // kmp_int32 cncl_kind);
6205 auto &M = CGM.getModule();
6206 if (auto *OMPRegionInfo =
6207 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6208 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6209 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6210 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6211 llvm::Value *Args[] = {
6212 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6213 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6214 // Ignore return result until untied tasks are supported.
6215 llvm::Value *Result = CGF.EmitRuntimeCall(
6216 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6217 // if (__kmpc_cancel()) {
6218 // exit from construct;
6219 // }
6220 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6221 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6222 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6223 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6224 CGF.EmitBlock(ExitBB);
6225 // exit from construct;
6226 CodeGenFunction::JumpDest CancelDest =
6227 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6228 CGF.EmitBranchThroughCleanup(CancelDest);
6229 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6230 };
6231 if (IfCond) {
6232 emitIfClause(CGF, IfCond, ThenGen,
6233 [](CodeGenFunction &, PrePostActionTy &) {});
6234 } else {
6235 RegionCodeGenTy ThenRCG(ThenGen);
6236 ThenRCG(CGF);
6237 }
6238 }
6239 }
6240
6241 namespace {
6242 /// Cleanup action for uses_allocators support.
6243 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6244 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6245
6246 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6247 OMPUsesAllocatorsActionTy(
6248 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6249 : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6250 void Enter(CodeGenFunction &CGF) override {
6251 if (!CGF.HaveInsertPoint())
6252 return;
6253 for (const auto &AllocatorData : Allocators) {
6254 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6255 CGF, AllocatorData.first, AllocatorData.second);
6256 }
6257 }
Exit(CodeGenFunction & CGF)6258 void Exit(CodeGenFunction &CGF) override {
6259 if (!CGF.HaveInsertPoint())
6260 return;
6261 for (const auto &AllocatorData : Allocators) {
6262 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6263 AllocatorData.first);
6264 }
6265 }
6266 };
6267 } // namespace
6268
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6269 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6270 const OMPExecutableDirective &D, StringRef ParentName,
6271 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6272 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6273 assert(!ParentName.empty() && "Invalid target region parent name!");
6274 HasEmittedTargetRegion = true;
6275 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6276 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6277 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6278 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6279 if (!D.AllocatorTraits)
6280 continue;
6281 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6282 }
6283 }
6284 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6285 CodeGen.setAction(UsesAllocatorAction);
6286 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6287 IsOffloadEntry, CodeGen);
6288 }
6289
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6290 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6291 const Expr *Allocator,
6292 const Expr *AllocatorTraits) {
6293 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6294 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6295 // Use default memspace handle.
6296 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6297 llvm::Value *NumTraits = llvm::ConstantInt::get(
6298 CGF.IntTy, cast<ConstantArrayType>(
6299 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6300 ->getSize()
6301 .getLimitedValue());
6302 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6303 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6304 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6305 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6306 AllocatorTraitsLVal.getBaseInfo(),
6307 AllocatorTraitsLVal.getTBAAInfo());
6308 llvm::Value *Traits =
6309 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6310
6311 llvm::Value *AllocatorVal =
6312 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6313 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6314 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6315 // Store to allocator.
6316 CGF.EmitVarDecl(*cast<VarDecl>(
6317 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6318 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6319 AllocatorVal =
6320 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6321 Allocator->getType(), Allocator->getExprLoc());
6322 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6323 }
6324
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6325 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6326 const Expr *Allocator) {
6327 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6328 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6329 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6330 llvm::Value *AllocatorVal =
6331 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6332 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6333 CGF.getContext().VoidPtrTy,
6334 Allocator->getExprLoc());
6335 (void)CGF.EmitRuntimeCall(
6336 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6337 OMPRTL___kmpc_destroy_allocator),
6338 {ThreadId, AllocatorVal});
6339 }
6340
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6341 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6342 const OMPExecutableDirective &D, StringRef ParentName,
6343 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6344 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6345 // Create a unique name for the entry function using the source location
6346 // information of the current target region. The name will be something like:
6347 //
6348 // __omp_offloading_DD_FFFF_PP_lBB
6349 //
6350 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6351 // mangled name of the function that encloses the target region and BB is the
6352 // line number of the target region.
6353
6354 unsigned DeviceID;
6355 unsigned FileID;
6356 unsigned Line;
6357 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6358 Line);
6359 SmallString<64> EntryFnName;
6360 {
6361 llvm::raw_svector_ostream OS(EntryFnName);
6362 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6363 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6364 }
6365
6366 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6367
6368 CodeGenFunction CGF(CGM, true);
6369 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6370 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6371
6372 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6373
6374 // If this target outline function is not an offload entry, we don't need to
6375 // register it.
6376 if (!IsOffloadEntry)
6377 return;
6378
6379 // The target region ID is used by the runtime library to identify the current
6380 // target region, so it only has to be unique and not necessarily point to
6381 // anything. It could be the pointer to the outlined function that implements
6382 // the target region, but we aren't using that so that the compiler doesn't
6383 // need to keep that, and could therefore inline the host function if proven
6384 // worthwhile during optimization. In the other hand, if emitting code for the
6385 // device, the ID has to be the function address so that it can retrieved from
6386 // the offloading entry and launched by the runtime library. We also mark the
6387 // outlined function to have external linkage in case we are emitting code for
6388 // the device, because these functions will be entry points to the device.
6389
6390 if (CGM.getLangOpts().OpenMPIsDevice) {
6391 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6392 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6393 OutlinedFn->setDSOLocal(false);
6394 } else {
6395 std::string Name = getName({EntryFnName, "region_id"});
6396 OutlinedFnID = new llvm::GlobalVariable(
6397 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6398 llvm::GlobalValue::WeakAnyLinkage,
6399 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6400 }
6401
6402 // Register the information for the entry associated with this target region.
6403 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6404 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6405 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6406 }
6407
6408 /// Checks if the expression is constant or does not have non-trivial function
6409 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6410 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6411 // We can skip constant expressions.
6412 // We can skip expressions with trivial calls or simple expressions.
6413 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6414 !E->hasNonTrivialCall(Ctx)) &&
6415 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6416 }
6417
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6418 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6419 const Stmt *Body) {
6420 const Stmt *Child = Body->IgnoreContainers();
6421 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6422 Child = nullptr;
6423 for (const Stmt *S : C->body()) {
6424 if (const auto *E = dyn_cast<Expr>(S)) {
6425 if (isTrivial(Ctx, E))
6426 continue;
6427 }
6428 // Some of the statements can be ignored.
6429 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6430 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6431 continue;
6432 // Analyze declarations.
6433 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6434 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6435 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6436 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6437 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6438 isa<UsingDirectiveDecl>(D) ||
6439 isa<OMPDeclareReductionDecl>(D) ||
6440 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6441 return true;
6442 const auto *VD = dyn_cast<VarDecl>(D);
6443 if (!VD)
6444 return false;
6445 return VD->isConstexpr() ||
6446 ((VD->getType().isTrivialType(Ctx) ||
6447 VD->getType()->isReferenceType()) &&
6448 (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6449 }))
6450 continue;
6451 }
6452 // Found multiple children - cannot get the one child only.
6453 if (Child)
6454 return nullptr;
6455 Child = S;
6456 }
6457 if (Child)
6458 Child = Child->IgnoreContainers();
6459 }
6460 return Child;
6461 }
6462
6463 /// Emit the number of teams for a target directive. Inspect the num_teams
6464 /// clause associated with a teams construct combined or closely nested
6465 /// with the target directive.
6466 ///
6467 /// Emit a team of size one for directives such as 'target parallel' that
6468 /// have no associated teams construct.
6469 ///
6470 /// Otherwise, return nullptr.
6471 static llvm::Value *
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6472 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6473 const OMPExecutableDirective &D) {
6474 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6475 "Clauses associated with the teams directive expected to be emitted "
6476 "only for the host!");
6477 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6478 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6479 "Expected target-based executable directive.");
6480 CGBuilderTy &Bld = CGF.Builder;
6481 switch (DirectiveKind) {
6482 case OMPD_target: {
6483 const auto *CS = D.getInnermostCapturedStmt();
6484 const auto *Body =
6485 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6486 const Stmt *ChildStmt =
6487 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6488 if (const auto *NestedDir =
6489 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6490 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6491 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6492 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6494 const Expr *NumTeams =
6495 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6496 llvm::Value *NumTeamsVal =
6497 CGF.EmitScalarExpr(NumTeams,
6498 /*IgnoreResultAssign*/ true);
6499 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6500 /*isSigned=*/true);
6501 }
6502 return Bld.getInt32(0);
6503 }
6504 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6505 isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6506 return Bld.getInt32(1);
6507 return Bld.getInt32(0);
6508 }
6509 return nullptr;
6510 }
6511 case OMPD_target_teams:
6512 case OMPD_target_teams_distribute:
6513 case OMPD_target_teams_distribute_simd:
6514 case OMPD_target_teams_distribute_parallel_for:
6515 case OMPD_target_teams_distribute_parallel_for_simd: {
6516 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6517 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6518 const Expr *NumTeams =
6519 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6520 llvm::Value *NumTeamsVal =
6521 CGF.EmitScalarExpr(NumTeams,
6522 /*IgnoreResultAssign*/ true);
6523 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6524 /*isSigned=*/true);
6525 }
6526 return Bld.getInt32(0);
6527 }
6528 case OMPD_target_parallel:
6529 case OMPD_target_parallel_for:
6530 case OMPD_target_parallel_for_simd:
6531 case OMPD_target_simd:
6532 return Bld.getInt32(1);
6533 case OMPD_parallel:
6534 case OMPD_for:
6535 case OMPD_parallel_for:
6536 case OMPD_parallel_master:
6537 case OMPD_parallel_sections:
6538 case OMPD_for_simd:
6539 case OMPD_parallel_for_simd:
6540 case OMPD_cancel:
6541 case OMPD_cancellation_point:
6542 case OMPD_ordered:
6543 case OMPD_threadprivate:
6544 case OMPD_allocate:
6545 case OMPD_task:
6546 case OMPD_simd:
6547 case OMPD_sections:
6548 case OMPD_section:
6549 case OMPD_single:
6550 case OMPD_master:
6551 case OMPD_critical:
6552 case OMPD_taskyield:
6553 case OMPD_barrier:
6554 case OMPD_taskwait:
6555 case OMPD_taskgroup:
6556 case OMPD_atomic:
6557 case OMPD_flush:
6558 case OMPD_depobj:
6559 case OMPD_scan:
6560 case OMPD_teams:
6561 case OMPD_target_data:
6562 case OMPD_target_exit_data:
6563 case OMPD_target_enter_data:
6564 case OMPD_distribute:
6565 case OMPD_distribute_simd:
6566 case OMPD_distribute_parallel_for:
6567 case OMPD_distribute_parallel_for_simd:
6568 case OMPD_teams_distribute:
6569 case OMPD_teams_distribute_simd:
6570 case OMPD_teams_distribute_parallel_for:
6571 case OMPD_teams_distribute_parallel_for_simd:
6572 case OMPD_target_update:
6573 case OMPD_declare_simd:
6574 case OMPD_declare_variant:
6575 case OMPD_begin_declare_variant:
6576 case OMPD_end_declare_variant:
6577 case OMPD_declare_target:
6578 case OMPD_end_declare_target:
6579 case OMPD_declare_reduction:
6580 case OMPD_declare_mapper:
6581 case OMPD_taskloop:
6582 case OMPD_taskloop_simd:
6583 case OMPD_master_taskloop:
6584 case OMPD_master_taskloop_simd:
6585 case OMPD_parallel_master_taskloop:
6586 case OMPD_parallel_master_taskloop_simd:
6587 case OMPD_requires:
6588 case OMPD_unknown:
6589 break;
6590 default:
6591 break;
6592 }
6593 llvm_unreachable("Unexpected directive kind.");
6594 }
6595
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6596 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6597 llvm::Value *DefaultThreadLimitVal) {
6598 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6599 CGF.getContext(), CS->getCapturedStmt());
6600 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6601 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6602 llvm::Value *NumThreads = nullptr;
6603 llvm::Value *CondVal = nullptr;
6604 // Handle if clause. If if clause present, the number of threads is
6605 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6606 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6607 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6608 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6609 const OMPIfClause *IfClause = nullptr;
6610 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6611 if (C->getNameModifier() == OMPD_unknown ||
6612 C->getNameModifier() == OMPD_parallel) {
6613 IfClause = C;
6614 break;
6615 }
6616 }
6617 if (IfClause) {
6618 const Expr *Cond = IfClause->getCondition();
6619 bool Result;
6620 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6621 if (!Result)
6622 return CGF.Builder.getInt32(1);
6623 } else {
6624 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6625 if (const auto *PreInit =
6626 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6627 for (const auto *I : PreInit->decls()) {
6628 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6629 CGF.EmitVarDecl(cast<VarDecl>(*I));
6630 } else {
6631 CodeGenFunction::AutoVarEmission Emission =
6632 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6633 CGF.EmitAutoVarCleanups(Emission);
6634 }
6635 }
6636 }
6637 CondVal = CGF.EvaluateExprAsBool(Cond);
6638 }
6639 }
6640 }
6641 // Check the value of num_threads clause iff if clause was not specified
6642 // or is not evaluated to false.
6643 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6644 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6645 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6646 const auto *NumThreadsClause =
6647 Dir->getSingleClause<OMPNumThreadsClause>();
6648 CodeGenFunction::LexicalScope Scope(
6649 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6650 if (const auto *PreInit =
6651 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6652 for (const auto *I : PreInit->decls()) {
6653 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6654 CGF.EmitVarDecl(cast<VarDecl>(*I));
6655 } else {
6656 CodeGenFunction::AutoVarEmission Emission =
6657 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6658 CGF.EmitAutoVarCleanups(Emission);
6659 }
6660 }
6661 }
6662 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6663 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6664 /*isSigned=*/false);
6665 if (DefaultThreadLimitVal)
6666 NumThreads = CGF.Builder.CreateSelect(
6667 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6668 DefaultThreadLimitVal, NumThreads);
6669 } else {
6670 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6671 : CGF.Builder.getInt32(0);
6672 }
6673 // Process condition of the if clause.
6674 if (CondVal) {
6675 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6676 CGF.Builder.getInt32(1));
6677 }
6678 return NumThreads;
6679 }
6680 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6681 return CGF.Builder.getInt32(1);
6682 return DefaultThreadLimitVal;
6683 }
6684 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6685 : CGF.Builder.getInt32(0);
6686 }
6687
6688 /// Emit the number of threads for a target directive. Inspect the
6689 /// thread_limit clause associated with a teams construct combined or closely
6690 /// nested with the target directive.
6691 ///
6692 /// Emit the num_threads clause for directives such as 'target parallel' that
6693 /// have no associated teams construct.
6694 ///
6695 /// Otherwise, return nullptr.
6696 static llvm::Value *
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6697 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6698 const OMPExecutableDirective &D) {
6699 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6700 "Clauses associated with the teams directive expected to be emitted "
6701 "only for the host!");
6702 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6703 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6704 "Expected target-based executable directive.");
6705 CGBuilderTy &Bld = CGF.Builder;
6706 llvm::Value *ThreadLimitVal = nullptr;
6707 llvm::Value *NumThreadsVal = nullptr;
6708 switch (DirectiveKind) {
6709 case OMPD_target: {
6710 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6711 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6712 return NumThreads;
6713 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6714 CGF.getContext(), CS->getCapturedStmt());
6715 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6716 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6717 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6718 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6719 const auto *ThreadLimitClause =
6720 Dir->getSingleClause<OMPThreadLimitClause>();
6721 CodeGenFunction::LexicalScope Scope(
6722 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6723 if (const auto *PreInit =
6724 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6725 for (const auto *I : PreInit->decls()) {
6726 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6727 CGF.EmitVarDecl(cast<VarDecl>(*I));
6728 } else {
6729 CodeGenFunction::AutoVarEmission Emission =
6730 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6731 CGF.EmitAutoVarCleanups(Emission);
6732 }
6733 }
6734 }
6735 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6736 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6737 ThreadLimitVal =
6738 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6739 }
6740 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6741 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6742 CS = Dir->getInnermostCapturedStmt();
6743 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6744 CGF.getContext(), CS->getCapturedStmt());
6745 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6746 }
6747 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6748 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6749 CS = Dir->getInnermostCapturedStmt();
6750 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6751 return NumThreads;
6752 }
6753 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6754 return Bld.getInt32(1);
6755 }
6756 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6757 }
6758 case OMPD_target_teams: {
6759 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6760 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6761 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6762 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6763 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6764 ThreadLimitVal =
6765 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6766 }
6767 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6768 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6769 return NumThreads;
6770 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6771 CGF.getContext(), CS->getCapturedStmt());
6772 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6773 if (Dir->getDirectiveKind() == OMPD_distribute) {
6774 CS = Dir->getInnermostCapturedStmt();
6775 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6776 return NumThreads;
6777 }
6778 }
6779 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6780 }
6781 case OMPD_target_teams_distribute:
6782 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6783 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6784 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6785 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6786 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6787 ThreadLimitVal =
6788 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6789 }
6790 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6791 case OMPD_target_parallel:
6792 case OMPD_target_parallel_for:
6793 case OMPD_target_parallel_for_simd:
6794 case OMPD_target_teams_distribute_parallel_for:
6795 case OMPD_target_teams_distribute_parallel_for_simd: {
6796 llvm::Value *CondVal = nullptr;
6797 // Handle if clause. If if clause present, the number of threads is
6798 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6799 if (D.hasClausesOfKind<OMPIfClause>()) {
6800 const OMPIfClause *IfClause = nullptr;
6801 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6802 if (C->getNameModifier() == OMPD_unknown ||
6803 C->getNameModifier() == OMPD_parallel) {
6804 IfClause = C;
6805 break;
6806 }
6807 }
6808 if (IfClause) {
6809 const Expr *Cond = IfClause->getCondition();
6810 bool Result;
6811 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6812 if (!Result)
6813 return Bld.getInt32(1);
6814 } else {
6815 CodeGenFunction::RunCleanupsScope Scope(CGF);
6816 CondVal = CGF.EvaluateExprAsBool(Cond);
6817 }
6818 }
6819 }
6820 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6821 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6822 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6823 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6824 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6825 ThreadLimitVal =
6826 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6827 }
6828 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6829 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6830 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6831 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6832 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6833 NumThreadsVal =
6834 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6835 ThreadLimitVal = ThreadLimitVal
6836 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6837 ThreadLimitVal),
6838 NumThreadsVal, ThreadLimitVal)
6839 : NumThreadsVal;
6840 }
6841 if (!ThreadLimitVal)
6842 ThreadLimitVal = Bld.getInt32(0);
6843 if (CondVal)
6844 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6845 return ThreadLimitVal;
6846 }
6847 case OMPD_target_teams_distribute_simd:
6848 case OMPD_target_simd:
6849 return Bld.getInt32(1);
6850 case OMPD_parallel:
6851 case OMPD_for:
6852 case OMPD_parallel_for:
6853 case OMPD_parallel_master:
6854 case OMPD_parallel_sections:
6855 case OMPD_for_simd:
6856 case OMPD_parallel_for_simd:
6857 case OMPD_cancel:
6858 case OMPD_cancellation_point:
6859 case OMPD_ordered:
6860 case OMPD_threadprivate:
6861 case OMPD_allocate:
6862 case OMPD_task:
6863 case OMPD_simd:
6864 case OMPD_sections:
6865 case OMPD_section:
6866 case OMPD_single:
6867 case OMPD_master:
6868 case OMPD_critical:
6869 case OMPD_taskyield:
6870 case OMPD_barrier:
6871 case OMPD_taskwait:
6872 case OMPD_taskgroup:
6873 case OMPD_atomic:
6874 case OMPD_flush:
6875 case OMPD_depobj:
6876 case OMPD_scan:
6877 case OMPD_teams:
6878 case OMPD_target_data:
6879 case OMPD_target_exit_data:
6880 case OMPD_target_enter_data:
6881 case OMPD_distribute:
6882 case OMPD_distribute_simd:
6883 case OMPD_distribute_parallel_for:
6884 case OMPD_distribute_parallel_for_simd:
6885 case OMPD_teams_distribute:
6886 case OMPD_teams_distribute_simd:
6887 case OMPD_teams_distribute_parallel_for:
6888 case OMPD_teams_distribute_parallel_for_simd:
6889 case OMPD_target_update:
6890 case OMPD_declare_simd:
6891 case OMPD_declare_variant:
6892 case OMPD_begin_declare_variant:
6893 case OMPD_end_declare_variant:
6894 case OMPD_declare_target:
6895 case OMPD_end_declare_target:
6896 case OMPD_declare_reduction:
6897 case OMPD_declare_mapper:
6898 case OMPD_taskloop:
6899 case OMPD_taskloop_simd:
6900 case OMPD_master_taskloop:
6901 case OMPD_master_taskloop_simd:
6902 case OMPD_parallel_master_taskloop:
6903 case OMPD_parallel_master_taskloop_simd:
6904 case OMPD_requires:
6905 case OMPD_unknown:
6906 break;
6907 default:
6908 break;
6909 }
6910 llvm_unreachable("Unsupported directive kind.");
6911 }
6912
6913 namespace {
6914 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6915
6916 // Utility to handle information from clauses associated with a given
6917 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6918 // It provides a convenient interface to obtain the information and generate
6919 // code for that information.
6920 class MappableExprsHandler {
6921 public:
6922 /// Values for bit flags used to specify the mapping type for
6923 /// offloading.
6924 enum OpenMPOffloadMappingFlags : uint64_t {
6925 /// No flags
6926 OMP_MAP_NONE = 0x0,
6927 /// Allocate memory on the device and move data from host to device.
6928 OMP_MAP_TO = 0x01,
6929 /// Allocate memory on the device and move data from device to host.
6930 OMP_MAP_FROM = 0x02,
6931 /// Always perform the requested mapping action on the element, even
6932 /// if it was already mapped before.
6933 OMP_MAP_ALWAYS = 0x04,
6934 /// Delete the element from the device environment, ignoring the
6935 /// current reference count associated with the element.
6936 OMP_MAP_DELETE = 0x08,
6937 /// The element being mapped is a pointer-pointee pair; both the
6938 /// pointer and the pointee should be mapped.
6939 OMP_MAP_PTR_AND_OBJ = 0x10,
6940 /// This flags signals that the base address of an entry should be
6941 /// passed to the target kernel as an argument.
6942 OMP_MAP_TARGET_PARAM = 0x20,
6943 /// Signal that the runtime library has to return the device pointer
6944 /// in the current position for the data being mapped. Used when we have the
6945 /// use_device_ptr or use_device_addr clause.
6946 OMP_MAP_RETURN_PARAM = 0x40,
6947 /// This flag signals that the reference being passed is a pointer to
6948 /// private data.
6949 OMP_MAP_PRIVATE = 0x80,
6950 /// Pass the element to the device by value.
6951 OMP_MAP_LITERAL = 0x100,
6952 /// Implicit map
6953 OMP_MAP_IMPLICIT = 0x200,
6954 /// Close is a hint to the runtime to allocate memory close to
6955 /// the target device.
6956 OMP_MAP_CLOSE = 0x400,
6957 /// The 16 MSBs of the flags indicate whether the entry is member of some
6958 /// struct/class.
6959 OMP_MAP_MEMBER_OF = 0xffff000000000000,
6960 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6961 };
6962
6963 /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()6964 static unsigned getFlagMemberOffset() {
6965 unsigned Offset = 0;
6966 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
6967 Remain = Remain >> 1)
6968 Offset++;
6969 return Offset;
6970 }
6971
6972 /// Class that associates information with a base pointer to be passed to the
6973 /// runtime library.
6974 class BasePointerInfo {
6975 /// The base pointer.
6976 llvm::Value *Ptr = nullptr;
6977 /// The base declaration that refers to this device pointer, or null if
6978 /// there is none.
6979 const ValueDecl *DevPtrDecl = nullptr;
6980
6981 public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)6982 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6983 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const6984 llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const6985 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)6986 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6987 };
6988
6989 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6990 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6991 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6992
6993 /// Map between a struct and the its lowest & highest elements which have been
6994 /// mapped.
6995 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6996 /// HE(FieldIndex, Pointer)}
6997 struct StructRangeInfoTy {
6998 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6999 0, Address::invalid()};
7000 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7001 0, Address::invalid()};
7002 Address Base = Address::invalid();
7003 };
7004
7005 private:
7006 /// Kind that defines how a device pointer has to be returned.
7007 struct MapInfo {
7008 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7009 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7010 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7011 bool ReturnDevicePointer = false;
7012 bool IsImplicit = false;
7013 bool ForDeviceAddr = false;
7014
7015 MapInfo() = default;
MapInfo__anona637da6d3d11::MappableExprsHandler::MapInfo7016 MapInfo(
7017 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7018 OpenMPMapClauseKind MapType,
7019 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7020 bool IsImplicit, bool ForDeviceAddr = false)
7021 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7022 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7023 ForDeviceAddr(ForDeviceAddr) {}
7024 };
7025
7026 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7027 /// member and there is no map information about it, then emission of that
7028 /// entry is deferred until the whole struct has been processed.
7029 struct DeferredDevicePtrEntryTy {
7030 const Expr *IE = nullptr;
7031 const ValueDecl *VD = nullptr;
7032 bool ForDeviceAddr = false;
7033
DeferredDevicePtrEntryTy__anona637da6d3d11::MappableExprsHandler::DeferredDevicePtrEntryTy7034 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7035 bool ForDeviceAddr)
7036 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7037 };
7038
7039 /// The target directive from where the mappable clauses were extracted. It
7040 /// is either a executable directive or a user-defined mapper directive.
7041 llvm::PointerUnion<const OMPExecutableDirective *,
7042 const OMPDeclareMapperDecl *>
7043 CurDir;
7044
7045 /// Function the directive is being generated for.
7046 CodeGenFunction &CGF;
7047
7048 /// Set of all first private variables in the current directive.
7049 /// bool data is set to true if the variable is implicitly marked as
7050 /// firstprivate, false otherwise.
7051 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7052
7053 /// Map between device pointer declarations and their expression components.
7054 /// The key value for declarations in 'this' is null.
7055 llvm::DenseMap<
7056 const ValueDecl *,
7057 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7058 DevPointersMap;
7059
getExprTypeSize(const Expr * E) const7060 llvm::Value *getExprTypeSize(const Expr *E) const {
7061 QualType ExprTy = E->getType().getCanonicalType();
7062
7063 // Calculate the size for array shaping expression.
7064 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7065 llvm::Value *Size =
7066 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7067 for (const Expr *SE : OAE->getDimensions()) {
7068 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7069 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7070 CGF.getContext().getSizeType(),
7071 SE->getExprLoc());
7072 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7073 }
7074 return Size;
7075 }
7076
7077 // Reference types are ignored for mapping purposes.
7078 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7079 ExprTy = RefTy->getPointeeType().getCanonicalType();
7080
7081 // Given that an array section is considered a built-in type, we need to
7082 // do the calculation based on the length of the section instead of relying
7083 // on CGF.getTypeSize(E->getType()).
7084 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7085 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7086 OAE->getBase()->IgnoreParenImpCasts())
7087 .getCanonicalType();
7088
7089 // If there is no length associated with the expression and lower bound is
7090 // not specified too, that means we are using the whole length of the
7091 // base.
7092 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7093 !OAE->getLowerBound())
7094 return CGF.getTypeSize(BaseTy);
7095
7096 llvm::Value *ElemSize;
7097 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7098 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7099 } else {
7100 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7101 assert(ATy && "Expecting array type if not a pointer type.");
7102 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7103 }
7104
7105 // If we don't have a length at this point, that is because we have an
7106 // array section with a single element.
7107 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7108 return ElemSize;
7109
7110 if (const Expr *LenExpr = OAE->getLength()) {
7111 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7112 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7113 CGF.getContext().getSizeType(),
7114 LenExpr->getExprLoc());
7115 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7116 }
7117 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7118 OAE->getLowerBound() && "expected array_section[lb:].");
7119 // Size = sizetype - lb * elemtype;
7120 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7121 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7122 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7123 CGF.getContext().getSizeType(),
7124 OAE->getLowerBound()->getExprLoc());
7125 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7126 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7127 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7128 LengthVal = CGF.Builder.CreateSelect(
7129 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7130 return LengthVal;
7131 }
7132 return CGF.getTypeSize(ExprTy);
7133 }
7134
7135 /// Return the corresponding bits for a given map clause modifier. Add
7136 /// a flag marking the map as a pointer if requested. Add a flag marking the
7137 /// map as the first one of a series of maps that relate to the same map
7138 /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag) const7139 OpenMPOffloadMappingFlags getMapTypeBits(
7140 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7141 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7142 OpenMPOffloadMappingFlags Bits =
7143 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7144 switch (MapType) {
7145 case OMPC_MAP_alloc:
7146 case OMPC_MAP_release:
7147 // alloc and release is the default behavior in the runtime library, i.e.
7148 // if we don't pass any bits alloc/release that is what the runtime is
7149 // going to do. Therefore, we don't need to signal anything for these two
7150 // type modifiers.
7151 break;
7152 case OMPC_MAP_to:
7153 Bits |= OMP_MAP_TO;
7154 break;
7155 case OMPC_MAP_from:
7156 Bits |= OMP_MAP_FROM;
7157 break;
7158 case OMPC_MAP_tofrom:
7159 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7160 break;
7161 case OMPC_MAP_delete:
7162 Bits |= OMP_MAP_DELETE;
7163 break;
7164 case OMPC_MAP_unknown:
7165 llvm_unreachable("Unexpected map type!");
7166 }
7167 if (AddPtrFlag)
7168 Bits |= OMP_MAP_PTR_AND_OBJ;
7169 if (AddIsTargetParamFlag)
7170 Bits |= OMP_MAP_TARGET_PARAM;
7171 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7172 != MapModifiers.end())
7173 Bits |= OMP_MAP_ALWAYS;
7174 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7175 != MapModifiers.end())
7176 Bits |= OMP_MAP_CLOSE;
7177 return Bits;
7178 }
7179
7180 /// Return true if the provided expression is a final array section. A
7181 /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7182 bool isFinalArraySectionExpression(const Expr *E) const {
7183 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7184
7185 // It is not an array section and therefore not a unity-size one.
7186 if (!OASE)
7187 return false;
7188
7189 // An array section with no colon always refer to a single element.
7190 if (OASE->getColonLocFirst().isInvalid())
7191 return false;
7192
7193 const Expr *Length = OASE->getLength();
7194
7195 // If we don't have a length we have to check if the array has size 1
7196 // for this dimension. Also, we should always expect a length if the
7197 // base type is pointer.
7198 if (!Length) {
7199 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7200 OASE->getBase()->IgnoreParenImpCasts())
7201 .getCanonicalType();
7202 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7203 return ATy->getSize().getSExtValue() != 1;
7204 // If we don't have a constant dimension length, we have to consider
7205 // the current section as having any size, so it is not necessarily
7206 // unitary. If it happen to be unity size, that's user fault.
7207 return true;
7208 }
7209
7210 // Check if the length evaluates to 1.
7211 Expr::EvalResult Result;
7212 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7213 return true; // Can have more that size 1.
7214
7215 llvm::APSInt ConstLength = Result.Val.getInt();
7216 return ConstLength.getSExtValue() != 1;
7217 }
7218
7219 /// Generate the base pointers, section pointers, sizes and map type
7220 /// bits for the provided map type, map modifier, and expression components.
7221 /// \a IsFirstComponent should be set to true if the provided set of
7222 /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool ForDeviceAddr=false,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7223 void generateInfoForComponentList(
7224 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7226 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7227 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7228 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7229 bool IsImplicit, bool ForDeviceAddr = false,
7230 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7231 OverlappedElements = llvm::None) const {
7232 // The following summarizes what has to be generated for each map and the
7233 // types below. The generated information is expressed in this order:
7234 // base pointer, section pointer, size, flags
7235 // (to add to the ones that come from the map type and modifier).
7236 //
7237 // double d;
7238 // int i[100];
7239 // float *p;
7240 //
7241 // struct S1 {
7242 // int i;
7243 // float f[50];
7244 // }
7245 // struct S2 {
7246 // int i;
7247 // float f[50];
7248 // S1 s;
7249 // double *p;
7250 // struct S2 *ps;
7251 // }
7252 // S2 s;
7253 // S2 *ps;
7254 //
7255 // map(d)
7256 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7257 //
7258 // map(i)
7259 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7260 //
7261 // map(i[1:23])
7262 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7263 //
7264 // map(p)
7265 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7266 //
7267 // map(p[1:24])
7268 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7269 // in unified shared memory mode or for local pointers
7270 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7271 //
7272 // map(s)
7273 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7274 //
7275 // map(s.i)
7276 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7277 //
7278 // map(s.s.f)
7279 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7280 //
7281 // map(s.p)
7282 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7283 //
7284 // map(to: s.p[:22])
7285 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7286 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7287 // &(s.p), &(s.p[0]), 22*sizeof(double),
7288 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7289 // (*) alloc space for struct members, only this is a target parameter
7290 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7291 // optimizes this entry out, same in the examples below)
7292 // (***) map the pointee (map: to)
7293 //
7294 // map(s.ps)
7295 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7296 //
7297 // map(from: s.ps->s.i)
7298 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7299 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7300 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7301 //
7302 // map(to: s.ps->ps)
7303 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7304 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7305 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7306 //
7307 // map(s.ps->ps->ps)
7308 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7309 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7310 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7311 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7312 //
7313 // map(to: s.ps->ps->s.f[:22])
7314 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7315 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7316 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7317 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7318 //
7319 // map(ps)
7320 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7321 //
7322 // map(ps->i)
7323 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7324 //
7325 // map(ps->s.f)
7326 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7327 //
7328 // map(from: ps->p)
7329 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7330 //
7331 // map(to: ps->p[:22])
7332 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7333 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7334 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7335 //
7336 // map(ps->ps)
7337 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7338 //
7339 // map(from: ps->ps->s.i)
7340 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7341 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7342 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7343 //
7344 // map(from: ps->ps->ps)
7345 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7346 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7347 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7348 //
7349 // map(ps->ps->ps->ps)
7350 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7351 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7352 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7353 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7354 //
7355 // map(to: ps->ps->ps->s.f[:22])
7356 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7357 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7358 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7359 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7360 //
7361 // map(to: s.f[:22]) map(from: s.p[:33])
7362 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7363 // sizeof(double*) (**), TARGET_PARAM
7364 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7365 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7366 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7367 // (*) allocate contiguous space needed to fit all mapped members even if
7368 // we allocate space for members not mapped (in this example,
7369 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7370 // them as well because they fall between &s.f[0] and &s.p)
7371 //
7372 // map(from: s.f[:22]) map(to: ps->p[:33])
7373 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7374 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7375 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7376 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7377 // (*) the struct this entry pertains to is the 2nd element in the list of
7378 // arguments, hence MEMBER_OF(2)
7379 //
7380 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7381 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7382 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7383 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7384 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7385 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7386 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7387 // (*) the struct this entry pertains to is the 4th element in the list
7388 // of arguments, hence MEMBER_OF(4)
7389
7390 // Track if the map information being generated is the first for a capture.
7391 bool IsCaptureFirstInfo = IsFirstComponentList;
7392 // When the variable is on a declare target link or in a to clause with
7393 // unified memory, a reference is needed to hold the host/device address
7394 // of the variable.
7395 bool RequiresReference = false;
7396
7397 // Scan the components from the base to the complete expression.
7398 auto CI = Components.rbegin();
7399 auto CE = Components.rend();
7400 auto I = CI;
7401
7402 // Track if the map information being generated is the first for a list of
7403 // components.
7404 bool IsExpressionFirstInfo = true;
7405 bool FirstPointerInComplexData = false;
7406 Address BP = Address::invalid();
7407 const Expr *AssocExpr = I->getAssociatedExpression();
7408 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7409 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7410 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7411
7412 if (isa<MemberExpr>(AssocExpr)) {
7413 // The base is the 'this' pointer. The content of the pointer is going
7414 // to be the base of the field being mapped.
7415 BP = CGF.LoadCXXThisAddress();
7416 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7417 (OASE &&
7418 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7419 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7420 } else if (OAShE &&
7421 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7422 BP = Address(
7423 CGF.EmitScalarExpr(OAShE->getBase()),
7424 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7425 } else {
7426 // The base is the reference to the variable.
7427 // BP = &Var.
7428 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7429 if (const auto *VD =
7430 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7431 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7432 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7433 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7434 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7435 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7436 RequiresReference = true;
7437 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7438 }
7439 }
7440 }
7441
7442 // If the variable is a pointer and is being dereferenced (i.e. is not
7443 // the last component), the base has to be the pointer itself, not its
7444 // reference. References are ignored for mapping purposes.
7445 QualType Ty =
7446 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7447 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7448 // No need to generate individual map information for the pointer, it
7449 // can be associated with the combined storage if shared memory mode is
7450 // active or the base declaration is not global variable.
7451 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7452 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7453 !VD || VD->hasLocalStorage())
7454 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7455 else
7456 FirstPointerInComplexData = IsCaptureFirstInfo;
7457 ++I;
7458 }
7459 }
7460
7461 // Track whether a component of the list should be marked as MEMBER_OF some
7462 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7463 // in a component list should be marked as MEMBER_OF, all subsequent entries
7464 // do not belong to the base struct. E.g.
7465 // struct S2 s;
7466 // s.ps->ps->ps->f[:]
7467 // (1) (2) (3) (4)
7468 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7469 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7470 // is the pointee of ps(2) which is not member of struct s, so it should not
7471 // be marked as such (it is still PTR_AND_OBJ).
7472 // The variable is initialized to false so that PTR_AND_OBJ entries which
7473 // are not struct members are not considered (e.g. array of pointers to
7474 // data).
7475 bool ShouldBeMemberOf = false;
7476
7477 // Variable keeping track of whether or not we have encountered a component
7478 // in the component list which is a member expression. Useful when we have a
7479 // pointer or a final array section, in which case it is the previous
7480 // component in the list which tells us whether we have a member expression.
7481 // E.g. X.f[:]
7482 // While processing the final array section "[:]" it is "f" which tells us
7483 // whether we are dealing with a member of a declared struct.
7484 const MemberExpr *EncounteredME = nullptr;
7485
7486 for (; I != CE; ++I) {
7487 // If the current component is member of a struct (parent struct) mark it.
7488 if (!EncounteredME) {
7489 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7490 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7491 // as MEMBER_OF the parent struct.
7492 if (EncounteredME) {
7493 ShouldBeMemberOf = true;
7494 // Do not emit as complex pointer if this is actually not array-like
7495 // expression.
7496 if (FirstPointerInComplexData) {
7497 QualType Ty = std::prev(I)
7498 ->getAssociatedDeclaration()
7499 ->getType()
7500 .getNonReferenceType();
7501 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7502 FirstPointerInComplexData = false;
7503 }
7504 }
7505 }
7506
7507 auto Next = std::next(I);
7508
7509 // We need to generate the addresses and sizes if this is the last
7510 // component, if the component is a pointer or if it is an array section
7511 // whose length can't be proved to be one. If this is a pointer, it
7512 // becomes the base address for the following components.
7513
7514 // A final array section, is one whose length can't be proved to be one.
7515 bool IsFinalArraySection =
7516 isFinalArraySectionExpression(I->getAssociatedExpression());
7517
7518 // Get information on whether the element is a pointer. Have to do a
7519 // special treatment for array sections given that they are built-in
7520 // types.
7521 const auto *OASE =
7522 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7523 const auto *OAShE =
7524 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7525 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7526 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7527 bool IsPointer =
7528 OAShE ||
7529 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7530 .getCanonicalType()
7531 ->isAnyPointerType()) ||
7532 I->getAssociatedExpression()->getType()->isAnyPointerType();
7533 bool IsNonDerefPointer = IsPointer && !UO && !BO;
7534
7535 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7536 // If this is not the last component, we expect the pointer to be
7537 // associated with an array expression or member expression.
7538 assert((Next == CE ||
7539 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7540 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7541 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7542 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7543 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7544 "Unexpected expression");
7545
7546 Address LB = Address::invalid();
7547 if (OAShE) {
7548 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7549 CGF.getContext().getTypeAlignInChars(
7550 OAShE->getBase()->getType()));
7551 } else {
7552 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7553 .getAddress(CGF);
7554 }
7555
7556 // If this component is a pointer inside the base struct then we don't
7557 // need to create any entry for it - it will be combined with the object
7558 // it is pointing to into a single PTR_AND_OBJ entry.
7559 bool IsMemberPointerOrAddr =
7560 (IsPointer || ForDeviceAddr) && EncounteredME &&
7561 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7562 EncounteredME);
7563 if (!OverlappedElements.empty()) {
7564 // Handle base element with the info for overlapped elements.
7565 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7566 assert(Next == CE &&
7567 "Expected last element for the overlapped elements.");
7568 assert(!IsPointer &&
7569 "Unexpected base element with the pointer type.");
7570 // Mark the whole struct as the struct that requires allocation on the
7571 // device.
7572 PartialStruct.LowestElem = {0, LB};
7573 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7574 I->getAssociatedExpression()->getType());
7575 Address HB = CGF.Builder.CreateConstGEP(
7576 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7577 CGF.VoidPtrTy),
7578 TypeSize.getQuantity() - 1);
7579 PartialStruct.HighestElem = {
7580 std::numeric_limits<decltype(
7581 PartialStruct.HighestElem.first)>::max(),
7582 HB};
7583 PartialStruct.Base = BP;
7584 // Emit data for non-overlapped data.
7585 OpenMPOffloadMappingFlags Flags =
7586 OMP_MAP_MEMBER_OF |
7587 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7588 /*AddPtrFlag=*/false,
7589 /*AddIsTargetParamFlag=*/false);
7590 LB = BP;
7591 llvm::Value *Size = nullptr;
7592 // Do bitcopy of all non-overlapped structure elements.
7593 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7594 Component : OverlappedElements) {
7595 Address ComponentLB = Address::invalid();
7596 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7597 Component) {
7598 if (MC.getAssociatedDeclaration()) {
7599 ComponentLB =
7600 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7601 .getAddress(CGF);
7602 Size = CGF.Builder.CreatePtrDiff(
7603 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7604 CGF.EmitCastToVoidPtr(LB.getPointer()));
7605 break;
7606 }
7607 }
7608 BasePointers.push_back(BP.getPointer());
7609 Pointers.push_back(LB.getPointer());
7610 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7611 /*isSigned=*/true));
7612 Types.push_back(Flags);
7613 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7614 }
7615 BasePointers.push_back(BP.getPointer());
7616 Pointers.push_back(LB.getPointer());
7617 Size = CGF.Builder.CreatePtrDiff(
7618 CGF.EmitCastToVoidPtr(
7619 CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7620 CGF.EmitCastToVoidPtr(LB.getPointer()));
7621 Sizes.push_back(
7622 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7623 Types.push_back(Flags);
7624 break;
7625 }
7626 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7627 if (!IsMemberPointerOrAddr) {
7628 BasePointers.push_back(BP.getPointer());
7629 Pointers.push_back(LB.getPointer());
7630 Sizes.push_back(
7631 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7632
7633 // We need to add a pointer flag for each map that comes from the
7634 // same expression except for the first one. We also need to signal
7635 // this map is the first one that relates with the current capture
7636 // (there is a set of entries for each capture).
7637 OpenMPOffloadMappingFlags Flags =
7638 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7639 !IsExpressionFirstInfo || RequiresReference ||
7640 FirstPointerInComplexData,
7641 IsCaptureFirstInfo && !RequiresReference);
7642
7643 if (!IsExpressionFirstInfo) {
7644 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7645 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7646 if (IsPointer)
7647 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7648 OMP_MAP_DELETE | OMP_MAP_CLOSE);
7649
7650 if (ShouldBeMemberOf) {
7651 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7652 // should be later updated with the correct value of MEMBER_OF.
7653 Flags |= OMP_MAP_MEMBER_OF;
7654 // From now on, all subsequent PTR_AND_OBJ entries should not be
7655 // marked as MEMBER_OF.
7656 ShouldBeMemberOf = false;
7657 }
7658 }
7659
7660 Types.push_back(Flags);
7661 }
7662
7663 // If we have encountered a member expression so far, keep track of the
7664 // mapped member. If the parent is "*this", then the value declaration
7665 // is nullptr.
7666 if (EncounteredME) {
7667 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7668 unsigned FieldIndex = FD->getFieldIndex();
7669
7670 // Update info about the lowest and highest elements for this struct
7671 if (!PartialStruct.Base.isValid()) {
7672 PartialStruct.LowestElem = {FieldIndex, LB};
7673 if (IsFinalArraySection) {
7674 Address HB =
7675 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7676 .getAddress(CGF);
7677 PartialStruct.HighestElem = {FieldIndex, HB};
7678 } else {
7679 PartialStruct.HighestElem = {FieldIndex, LB};
7680 }
7681 PartialStruct.Base = BP;
7682 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7683 PartialStruct.LowestElem = {FieldIndex, LB};
7684 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7685 PartialStruct.HighestElem = {FieldIndex, LB};
7686 }
7687 }
7688
7689 // If we have a final array section, we are done with this expression.
7690 if (IsFinalArraySection)
7691 break;
7692
7693 // The pointer becomes the base for the next element.
7694 if (Next != CE)
7695 BP = LB;
7696
7697 IsExpressionFirstInfo = false;
7698 IsCaptureFirstInfo = false;
7699 FirstPointerInComplexData = false;
7700 }
7701 }
7702 }
7703
7704 /// Return the adjusted map modifiers if the declaration a capture refers to
7705 /// appears in a first-private clause. This is expected to be used only with
7706 /// directives that start with 'target'.
7707 MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7708 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7709 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7710
7711 // A first private variable captured by reference will use only the
7712 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7713 // declaration is known as first-private in this handler.
7714 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7715 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7716 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7717 return MappableExprsHandler::OMP_MAP_ALWAYS |
7718 MappableExprsHandler::OMP_MAP_TO;
7719 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7720 return MappableExprsHandler::OMP_MAP_TO |
7721 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7722 return MappableExprsHandler::OMP_MAP_PRIVATE |
7723 MappableExprsHandler::OMP_MAP_TO;
7724 }
7725 return MappableExprsHandler::OMP_MAP_TO |
7726 MappableExprsHandler::OMP_MAP_FROM;
7727 }
7728
getMemberOfFlag(unsigned Position)7729 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7730 // Rotate by getFlagMemberOffset() bits.
7731 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7732 << getFlagMemberOffset());
7733 }
7734
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)7735 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7736 OpenMPOffloadMappingFlags MemberOfFlag) {
7737 // If the entry is PTR_AND_OBJ but has not been marked with the special
7738 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7739 // marked as MEMBER_OF.
7740 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7741 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7742 return;
7743
7744 // Reset the placeholder value to prepare the flag for the assignment of the
7745 // proper MEMBER_OF value.
7746 Flags &= ~OMP_MAP_MEMBER_OF;
7747 Flags |= MemberOfFlag;
7748 }
7749
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7750 void getPlainLayout(const CXXRecordDecl *RD,
7751 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7752 bool AsBase) const {
7753 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7754
7755 llvm::StructType *St =
7756 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7757
7758 unsigned NumElements = St->getNumElements();
7759 llvm::SmallVector<
7760 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7761 RecordLayout(NumElements);
7762
7763 // Fill bases.
7764 for (const auto &I : RD->bases()) {
7765 if (I.isVirtual())
7766 continue;
7767 const auto *Base = I.getType()->getAsCXXRecordDecl();
7768 // Ignore empty bases.
7769 if (Base->isEmpty() || CGF.getContext()
7770 .getASTRecordLayout(Base)
7771 .getNonVirtualSize()
7772 .isZero())
7773 continue;
7774
7775 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7776 RecordLayout[FieldIndex] = Base;
7777 }
7778 // Fill in virtual bases.
7779 for (const auto &I : RD->vbases()) {
7780 const auto *Base = I.getType()->getAsCXXRecordDecl();
7781 // Ignore empty bases.
7782 if (Base->isEmpty())
7783 continue;
7784 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7785 if (RecordLayout[FieldIndex])
7786 continue;
7787 RecordLayout[FieldIndex] = Base;
7788 }
7789 // Fill in all the fields.
7790 assert(!RD->isUnion() && "Unexpected union.");
7791 for (const auto *Field : RD->fields()) {
7792 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7793 // will fill in later.)
7794 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7795 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7796 RecordLayout[FieldIndex] = Field;
7797 }
7798 }
7799 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7800 &Data : RecordLayout) {
7801 if (Data.isNull())
7802 continue;
7803 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7804 getPlainLayout(Base, Layout, /*AsBase=*/true);
7805 else
7806 Layout.push_back(Data.get<const FieldDecl *>());
7807 }
7808 }
7809
7810 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)7811 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7812 : CurDir(&Dir), CGF(CGF) {
7813 // Extract firstprivate clause information.
7814 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7815 for (const auto *D : C->varlists())
7816 FirstPrivateDecls.try_emplace(
7817 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7818 // Extract implicit firstprivates from uses_allocators clauses.
7819 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7820 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7821 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7822 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7823 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7824 /*Implicit=*/true);
7825 else if (const auto *VD = dyn_cast<VarDecl>(
7826 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7827 ->getDecl()))
7828 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7829 }
7830 }
7831 // Extract device pointer clause information.
7832 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7833 for (auto L : C->component_lists())
7834 DevPointersMap[L.first].push_back(L.second);
7835 }
7836
7837 /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)7838 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7839 : CurDir(&Dir), CGF(CGF) {}
7840
7841 /// Generate code for the combined entry if we have a partially mapped struct
7842 /// and take care of the mapping flags of the arguments corresponding to
7843 /// individual struct members.
emitCombinedEntry(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct) const7844 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7845 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7846 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7847 const StructRangeInfoTy &PartialStruct) const {
7848 // Base is the base of the struct
7849 BasePointers.push_back(PartialStruct.Base.getPointer());
7850 // Pointer is the address of the lowest element
7851 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7852 Pointers.push_back(LB);
7853 // Size is (addr of {highest+1} element) - (addr of lowest element)
7854 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7855 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7856 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7857 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7858 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7859 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7860 /*isSigned=*/false);
7861 Sizes.push_back(Size);
7862 // Map type is always TARGET_PARAM
7863 Types.push_back(OMP_MAP_TARGET_PARAM);
7864 // Remove TARGET_PARAM flag from the first element
7865 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7866
7867 // All other current entries will be MEMBER_OF the combined entry
7868 // (except for PTR_AND_OBJ entries which do not have a placeholder value
7869 // 0xFFFF in the MEMBER_OF field).
7870 OpenMPOffloadMappingFlags MemberOfFlag =
7871 getMemberOfFlag(BasePointers.size() - 1);
7872 for (auto &M : CurTypes)
7873 setCorrectMemberOfFlag(M, MemberOfFlag);
7874 }
7875
7876 /// Generate all the base pointers, section pointers, sizes and map
7877 /// types for the extracted mappable expressions. Also, for each item that
7878 /// relates with a device pointer, a pair of the relevant declaration and
7879 /// index where it occurs is appended to the device pointers info array.
generateAllInfo(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const7880 void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7881 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7882 MapFlagsArrayTy &Types) const {
7883 // We have to process the component lists that relate with the same
7884 // declaration in a single chunk so that we can generate the map flags
7885 // correctly. Therefore, we organize all lists in a map.
7886 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7887
7888 // Helper function to fill the information map for the different supported
7889 // clauses.
7890 auto &&InfoGen =
7891 [&Info](const ValueDecl *D,
7892 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7893 OpenMPMapClauseKind MapType,
7894 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7895 bool ReturnDevicePointer, bool IsImplicit,
7896 bool ForDeviceAddr = false) {
7897 const ValueDecl *VD =
7898 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7899 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7900 IsImplicit, ForDeviceAddr);
7901 };
7902
7903 assert(CurDir.is<const OMPExecutableDirective *>() &&
7904 "Expect a executable directive");
7905 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7906 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7907 for (const auto L : C->component_lists()) {
7908 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7909 /*ReturnDevicePointer=*/false, C->isImplicit());
7910 }
7911 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7912 for (const auto L : C->component_lists()) {
7913 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7914 /*ReturnDevicePointer=*/false, C->isImplicit());
7915 }
7916 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7917 for (const auto L : C->component_lists()) {
7918 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7919 /*ReturnDevicePointer=*/false, C->isImplicit());
7920 }
7921
7922 // Look at the use_device_ptr clause information and mark the existing map
7923 // entries as such. If there is no map information for an entry in the
7924 // use_device_ptr list, we create one with map type 'alloc' and zero size
7925 // section. It is the user fault if that was not mapped before. If there is
7926 // no map information and the pointer is a struct member, then we defer the
7927 // emission of that entry until the whole struct has been processed.
7928 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7929 DeferredInfo;
7930 MapBaseValuesArrayTy UseDevicePtrBasePointers;
7931 MapValuesArrayTy UseDevicePtrPointers;
7932 MapValuesArrayTy UseDevicePtrSizes;
7933 MapFlagsArrayTy UseDevicePtrTypes;
7934
7935 for (const auto *C :
7936 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7937 for (const auto L : C->component_lists()) {
7938 assert(!L.second.empty() && "Not expecting empty list of components!");
7939 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7940 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7941 const Expr *IE = L.second.back().getAssociatedExpression();
7942 // If the first component is a member expression, we have to look into
7943 // 'this', which maps to null in the map of map information. Otherwise
7944 // look directly for the information.
7945 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7946
7947 // We potentially have map information for this declaration already.
7948 // Look for the first set of components that refer to it.
7949 if (It != Info.end()) {
7950 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
7951 return MI.Components.back().getAssociatedDeclaration() == VD;
7952 });
7953 // If we found a map entry, signal that the pointer has to be returned
7954 // and move on to the next declaration.
7955 // Exclude cases where the base pointer is mapped as array subscript,
7956 // array section or array shaping. The base address is passed as a
7957 // pointer to base in this case and cannot be used as a base for
7958 // use_device_ptr list item.
7959 if (CI != It->second.end()) {
7960 auto PrevCI = std::next(CI->Components.rbegin());
7961 const auto *VarD = dyn_cast<VarDecl>(VD);
7962 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7963 isa<MemberExpr>(IE) ||
7964 !VD->getType().getNonReferenceType()->isPointerType() ||
7965 PrevCI == CI->Components.rend() ||
7966 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7967 VarD->hasLocalStorage()) {
7968 CI->ReturnDevicePointer = true;
7969 continue;
7970 }
7971 }
7972 }
7973
7974 // We didn't find any match in our map information - generate a zero
7975 // size array section - if the pointer is a struct member we defer this
7976 // action until the whole struct has been processed.
7977 if (isa<MemberExpr>(IE)) {
7978 // Insert the pointer into Info to be processed by
7979 // generateInfoForComponentList. Because it is a member pointer
7980 // without a pointee, no entry will be generated for it, therefore
7981 // we need to generate one after the whole struct has been processed.
7982 // Nonetheless, generateInfoForComponentList must be called to take
7983 // the pointer into account for the calculation of the range of the
7984 // partial struct.
7985 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7986 /*ReturnDevicePointer=*/false, C->isImplicit());
7987 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
7988 } else {
7989 llvm::Value *Ptr =
7990 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7991 UseDevicePtrBasePointers.emplace_back(Ptr, VD);
7992 UseDevicePtrPointers.push_back(Ptr);
7993 UseDevicePtrSizes.push_back(
7994 llvm::Constant::getNullValue(CGF.Int64Ty));
7995 UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
7996 OMP_MAP_TARGET_PARAM);
7997 }
7998 }
7999 }
8000
8001 // Look at the use_device_addr clause information and mark the existing map
8002 // entries as such. If there is no map information for an entry in the
8003 // use_device_addr list, we create one with map type 'alloc' and zero size
8004 // section. It is the user fault if that was not mapped before. If there is
8005 // no map information and the pointer is a struct member, then we defer the
8006 // emission of that entry until the whole struct has been processed.
8007 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8008 for (const auto *C :
8009 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8010 for (const auto L : C->component_lists()) {
8011 assert(!L.second.empty() && "Not expecting empty list of components!");
8012 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8013 if (!Processed.insert(VD).second)
8014 continue;
8015 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8016 const Expr *IE = L.second.back().getAssociatedExpression();
8017 // If the first component is a member expression, we have to look into
8018 // 'this', which maps to null in the map of map information. Otherwise
8019 // look directly for the information.
8020 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8021
8022 // We potentially have map information for this declaration already.
8023 // Look for the first set of components that refer to it.
8024 if (It != Info.end()) {
8025 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8026 return MI.Components.back().getAssociatedDeclaration() == VD;
8027 });
8028 // If we found a map entry, signal that the pointer has to be returned
8029 // and move on to the next declaration.
8030 if (CI != It->second.end()) {
8031 CI->ReturnDevicePointer = true;
8032 continue;
8033 }
8034 }
8035
8036 // We didn't find any match in our map information - generate a zero
8037 // size array section - if the pointer is a struct member we defer this
8038 // action until the whole struct has been processed.
8039 if (isa<MemberExpr>(IE)) {
8040 // Insert the pointer into Info to be processed by
8041 // generateInfoForComponentList. Because it is a member pointer
8042 // without a pointee, no entry will be generated for it, therefore
8043 // we need to generate one after the whole struct has been processed.
8044 // Nonetheless, generateInfoForComponentList must be called to take
8045 // the pointer into account for the calculation of the range of the
8046 // partial struct.
8047 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8048 /*ReturnDevicePointer=*/false, C->isImplicit(),
8049 /*ForDeviceAddr=*/true);
8050 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8051 } else {
8052 llvm::Value *Ptr;
8053 if (IE->isGLValue())
8054 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8055 else
8056 Ptr = CGF.EmitScalarExpr(IE);
8057 UseDevicePtrBasePointers.emplace_back(Ptr, VD);
8058 UseDevicePtrPointers.push_back(Ptr);
8059 UseDevicePtrSizes.push_back(
8060 llvm::Constant::getNullValue(CGF.Int64Ty));
8061 UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
8062 OMP_MAP_TARGET_PARAM);
8063 }
8064 }
8065 }
8066
8067 for (const auto &M : Info) {
8068 // We need to know when we generate information for the first component
8069 // associated with a capture, because the mapping flags depend on it.
8070 bool IsFirstComponentList = true;
8071
8072 // Temporary versions of arrays
8073 MapBaseValuesArrayTy CurBasePointers;
8074 MapValuesArrayTy CurPointers;
8075 MapValuesArrayTy CurSizes;
8076 MapFlagsArrayTy CurTypes;
8077 StructRangeInfoTy PartialStruct;
8078
8079 for (const MapInfo &L : M.second) {
8080 assert(!L.Components.empty() &&
8081 "Not expecting declaration with no component lists.");
8082
8083 // Remember the current base pointer index.
8084 unsigned CurrentBasePointersIdx = CurBasePointers.size();
8085 generateInfoForComponentList(
8086 L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8087 CurPointers, CurSizes, CurTypes, PartialStruct,
8088 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8089
8090 // If this entry relates with a device pointer, set the relevant
8091 // declaration and add the 'return pointer' flag.
8092 if (L.ReturnDevicePointer) {
8093 assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8094 "Unexpected number of mapped base pointers.");
8095
8096 const ValueDecl *RelevantVD =
8097 L.Components.back().getAssociatedDeclaration();
8098 assert(RelevantVD &&
8099 "No relevant declaration related with device pointer??");
8100
8101 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8102 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8103 }
8104 IsFirstComponentList = false;
8105 }
8106
8107 // Append any pending zero-length pointers which are struct members and
8108 // used with use_device_ptr or use_device_addr.
8109 auto CI = DeferredInfo.find(M.first);
8110 if (CI != DeferredInfo.end()) {
8111 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8112 llvm::Value *BasePtr;
8113 llvm::Value *Ptr;
8114 if (L.ForDeviceAddr) {
8115 if (L.IE->isGLValue())
8116 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8117 else
8118 Ptr = this->CGF.EmitScalarExpr(L.IE);
8119 BasePtr = Ptr;
8120 // Entry is RETURN_PARAM. Also, set the placeholder value
8121 // MEMBER_OF=FFFF so that the entry is later updated with the
8122 // correct value of MEMBER_OF.
8123 CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8124 } else {
8125 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8126 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8127 L.IE->getExprLoc());
8128 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8129 // value MEMBER_OF=FFFF so that the entry is later updated with the
8130 // correct value of MEMBER_OF.
8131 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8132 OMP_MAP_MEMBER_OF);
8133 }
8134 CurBasePointers.emplace_back(BasePtr, L.VD);
8135 CurPointers.push_back(Ptr);
8136 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8137 }
8138 }
8139
8140 // If there is an entry in PartialStruct it means we have a struct with
8141 // individual members mapped. Emit an extra combined entry.
8142 if (PartialStruct.Base.isValid())
8143 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8144 PartialStruct);
8145
8146 // We need to append the results of this capture to what we already have.
8147 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8148 Pointers.append(CurPointers.begin(), CurPointers.end());
8149 Sizes.append(CurSizes.begin(), CurSizes.end());
8150 Types.append(CurTypes.begin(), CurTypes.end());
8151 }
8152 // Append data for use_device_ptr clauses.
8153 BasePointers.append(UseDevicePtrBasePointers.begin(),
8154 UseDevicePtrBasePointers.end());
8155 Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end());
8156 Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end());
8157 Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end());
8158 }
8159
8160 /// Generate all the base pointers, section pointers, sizes and map types for
8161 /// the extracted map clauses of user-defined mapper.
generateAllInfoForMapper(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8162 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8163 MapValuesArrayTy &Pointers,
8164 MapValuesArrayTy &Sizes,
8165 MapFlagsArrayTy &Types) const {
8166 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8167 "Expect a declare mapper directive");
8168 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8169 // We have to process the component lists that relate with the same
8170 // declaration in a single chunk so that we can generate the map flags
8171 // correctly. Therefore, we organize all lists in a map.
8172 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8173
8174 // Helper function to fill the information map for the different supported
8175 // clauses.
8176 auto &&InfoGen = [&Info](
8177 const ValueDecl *D,
8178 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8179 OpenMPMapClauseKind MapType,
8180 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8181 bool ReturnDevicePointer, bool IsImplicit) {
8182 const ValueDecl *VD =
8183 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8184 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8185 IsImplicit);
8186 };
8187
8188 for (const auto *C : CurMapperDir->clauselists()) {
8189 const auto *MC = cast<OMPMapClause>(C);
8190 for (const auto L : MC->component_lists()) {
8191 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8192 /*ReturnDevicePointer=*/false, MC->isImplicit());
8193 }
8194 }
8195
8196 for (const auto &M : Info) {
8197 // We need to know when we generate information for the first component
8198 // associated with a capture, because the mapping flags depend on it.
8199 bool IsFirstComponentList = true;
8200
8201 // Temporary versions of arrays
8202 MapBaseValuesArrayTy CurBasePointers;
8203 MapValuesArrayTy CurPointers;
8204 MapValuesArrayTy CurSizes;
8205 MapFlagsArrayTy CurTypes;
8206 StructRangeInfoTy PartialStruct;
8207
8208 for (const MapInfo &L : M.second) {
8209 assert(!L.Components.empty() &&
8210 "Not expecting declaration with no component lists.");
8211 generateInfoForComponentList(
8212 L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8213 CurPointers, CurSizes, CurTypes, PartialStruct,
8214 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8215 IsFirstComponentList = false;
8216 }
8217
8218 // If there is an entry in PartialStruct it means we have a struct with
8219 // individual members mapped. Emit an extra combined entry.
8220 if (PartialStruct.Base.isValid())
8221 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8222 PartialStruct);
8223
8224 // We need to append the results of this capture to what we already have.
8225 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8226 Pointers.append(CurPointers.begin(), CurPointers.end());
8227 Sizes.append(CurSizes.begin(), CurSizes.end());
8228 Types.append(CurTypes.begin(), CurTypes.end());
8229 }
8230 }
8231
8232 /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8233 void generateInfoForLambdaCaptures(
8234 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8235 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8236 MapFlagsArrayTy &Types,
8237 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8238 const auto *RD = VD->getType()
8239 .getCanonicalType()
8240 .getNonReferenceType()
8241 ->getAsCXXRecordDecl();
8242 if (!RD || !RD->isLambda())
8243 return;
8244 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8245 LValue VDLVal = CGF.MakeAddrLValue(
8246 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8247 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8248 FieldDecl *ThisCapture = nullptr;
8249 RD->getCaptureFields(Captures, ThisCapture);
8250 if (ThisCapture) {
8251 LValue ThisLVal =
8252 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8253 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8254 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8255 VDLVal.getPointer(CGF));
8256 BasePointers.push_back(ThisLVal.getPointer(CGF));
8257 Pointers.push_back(ThisLValVal.getPointer(CGF));
8258 Sizes.push_back(
8259 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8260 CGF.Int64Ty, /*isSigned=*/true));
8261 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8262 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8263 }
8264 for (const LambdaCapture &LC : RD->captures()) {
8265 if (!LC.capturesVariable())
8266 continue;
8267 const VarDecl *VD = LC.getCapturedVar();
8268 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8269 continue;
8270 auto It = Captures.find(VD);
8271 assert(It != Captures.end() && "Found lambda capture without field.");
8272 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8273 if (LC.getCaptureKind() == LCK_ByRef) {
8274 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8275 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8276 VDLVal.getPointer(CGF));
8277 BasePointers.push_back(VarLVal.getPointer(CGF));
8278 Pointers.push_back(VarLValVal.getPointer(CGF));
8279 Sizes.push_back(CGF.Builder.CreateIntCast(
8280 CGF.getTypeSize(
8281 VD->getType().getCanonicalType().getNonReferenceType()),
8282 CGF.Int64Ty, /*isSigned=*/true));
8283 } else {
8284 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8285 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8286 VDLVal.getPointer(CGF));
8287 BasePointers.push_back(VarLVal.getPointer(CGF));
8288 Pointers.push_back(VarRVal.getScalarVal());
8289 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8290 }
8291 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8292 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8293 }
8294 }
8295
8296 /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8297 void adjustMemberOfForLambdaCaptures(
8298 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8299 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8300 MapFlagsArrayTy &Types) const {
8301 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8302 // Set correct member_of idx for all implicit lambda captures.
8303 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8304 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8305 continue;
8306 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8307 assert(BasePtr && "Unable to find base lambda address.");
8308 int TgtIdx = -1;
8309 for (unsigned J = I; J > 0; --J) {
8310 unsigned Idx = J - 1;
8311 if (Pointers[Idx] != BasePtr)
8312 continue;
8313 TgtIdx = Idx;
8314 break;
8315 }
8316 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8317 // All other current entries will be MEMBER_OF the combined entry
8318 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8319 // 0xFFFF in the MEMBER_OF field).
8320 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8321 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8322 }
8323 }
8324
8325 /// Generate the base pointers, section pointers, sizes and map types
8326 /// associated to a given capture.
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct) const8327 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8328 llvm::Value *Arg,
8329 MapBaseValuesArrayTy &BasePointers,
8330 MapValuesArrayTy &Pointers,
8331 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8332 StructRangeInfoTy &PartialStruct) const {
8333 assert(!Cap->capturesVariableArrayType() &&
8334 "Not expecting to generate map info for a variable array type!");
8335
8336 // We need to know when we generating information for the first component
8337 const ValueDecl *VD = Cap->capturesThis()
8338 ? nullptr
8339 : Cap->getCapturedVar()->getCanonicalDecl();
8340
8341 // If this declaration appears in a is_device_ptr clause we just have to
8342 // pass the pointer by value. If it is a reference to a declaration, we just
8343 // pass its value.
8344 if (DevPointersMap.count(VD)) {
8345 BasePointers.emplace_back(Arg, VD);
8346 Pointers.push_back(Arg);
8347 Sizes.push_back(
8348 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8349 CGF.Int64Ty, /*isSigned=*/true));
8350 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8351 return;
8352 }
8353
8354 using MapData =
8355 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8356 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8357 SmallVector<MapData, 4> DeclComponentLists;
8358 assert(CurDir.is<const OMPExecutableDirective *>() &&
8359 "Expect a executable directive");
8360 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8361 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8362 for (const auto L : C->decl_component_lists(VD)) {
8363 assert(L.first == VD &&
8364 "We got information for the wrong declaration??");
8365 assert(!L.second.empty() &&
8366 "Not expecting declaration with no component lists.");
8367 DeclComponentLists.emplace_back(L.second, C->getMapType(),
8368 C->getMapTypeModifiers(),
8369 C->isImplicit());
8370 }
8371 }
8372
8373 // Find overlapping elements (including the offset from the base element).
8374 llvm::SmallDenseMap<
8375 const MapData *,
8376 llvm::SmallVector<
8377 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8378 4>
8379 OverlappedData;
8380 size_t Count = 0;
8381 for (const MapData &L : DeclComponentLists) {
8382 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8383 OpenMPMapClauseKind MapType;
8384 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8385 bool IsImplicit;
8386 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8387 ++Count;
8388 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8389 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8390 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8391 auto CI = Components.rbegin();
8392 auto CE = Components.rend();
8393 auto SI = Components1.rbegin();
8394 auto SE = Components1.rend();
8395 for (; CI != CE && SI != SE; ++CI, ++SI) {
8396 if (CI->getAssociatedExpression()->getStmtClass() !=
8397 SI->getAssociatedExpression()->getStmtClass())
8398 break;
8399 // Are we dealing with different variables/fields?
8400 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8401 break;
8402 }
8403 // Found overlapping if, at least for one component, reached the head of
8404 // the components list.
8405 if (CI == CE || SI == SE) {
8406 assert((CI != CE || SI != SE) &&
8407 "Unexpected full match of the mapping components.");
8408 const MapData &BaseData = CI == CE ? L : L1;
8409 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8410 SI == SE ? Components : Components1;
8411 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8412 OverlappedElements.getSecond().push_back(SubData);
8413 }
8414 }
8415 }
8416 // Sort the overlapped elements for each item.
8417 llvm::SmallVector<const FieldDecl *, 4> Layout;
8418 if (!OverlappedData.empty()) {
8419 if (const auto *CRD =
8420 VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8421 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8422 else {
8423 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8424 Layout.append(RD->field_begin(), RD->field_end());
8425 }
8426 }
8427 for (auto &Pair : OverlappedData) {
8428 llvm::sort(
8429 Pair.getSecond(),
8430 [&Layout](
8431 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8432 OMPClauseMappableExprCommon::MappableExprComponentListRef
8433 Second) {
8434 auto CI = First.rbegin();
8435 auto CE = First.rend();
8436 auto SI = Second.rbegin();
8437 auto SE = Second.rend();
8438 for (; CI != CE && SI != SE; ++CI, ++SI) {
8439 if (CI->getAssociatedExpression()->getStmtClass() !=
8440 SI->getAssociatedExpression()->getStmtClass())
8441 break;
8442 // Are we dealing with different variables/fields?
8443 if (CI->getAssociatedDeclaration() !=
8444 SI->getAssociatedDeclaration())
8445 break;
8446 }
8447
8448 // Lists contain the same elements.
8449 if (CI == CE && SI == SE)
8450 return false;
8451
8452 // List with less elements is less than list with more elements.
8453 if (CI == CE || SI == SE)
8454 return CI == CE;
8455
8456 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8457 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8458 if (FD1->getParent() == FD2->getParent())
8459 return FD1->getFieldIndex() < FD2->getFieldIndex();
8460 const auto It =
8461 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8462 return FD == FD1 || FD == FD2;
8463 });
8464 return *It == FD1;
8465 });
8466 }
8467
8468 // Associated with a capture, because the mapping flags depend on it.
8469 // Go through all of the elements with the overlapped elements.
8470 for (const auto &Pair : OverlappedData) {
8471 const MapData &L = *Pair.getFirst();
8472 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8473 OpenMPMapClauseKind MapType;
8474 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8475 bool IsImplicit;
8476 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8477 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8478 OverlappedComponents = Pair.getSecond();
8479 bool IsFirstComponentList = true;
8480 generateInfoForComponentList(
8481 MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8482 Types, PartialStruct, IsFirstComponentList, IsImplicit,
8483 /*ForDeviceAddr=*/false, OverlappedComponents);
8484 }
8485 // Go through other elements without overlapped elements.
8486 bool IsFirstComponentList = OverlappedData.empty();
8487 for (const MapData &L : DeclComponentLists) {
8488 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8489 OpenMPMapClauseKind MapType;
8490 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8491 bool IsImplicit;
8492 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8493 auto It = OverlappedData.find(&L);
8494 if (It == OverlappedData.end())
8495 generateInfoForComponentList(MapType, MapModifiers, Components,
8496 BasePointers, Pointers, Sizes, Types,
8497 PartialStruct, IsFirstComponentList,
8498 IsImplicit);
8499 IsFirstComponentList = false;
8500 }
8501 }
8502
8503 /// Generate the base pointers, section pointers, sizes and map types
8504 /// associated with the declare target link variables.
generateInfoForDeclareTargetLink(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8505 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8506 MapValuesArrayTy &Pointers,
8507 MapValuesArrayTy &Sizes,
8508 MapFlagsArrayTy &Types) const {
8509 assert(CurDir.is<const OMPExecutableDirective *>() &&
8510 "Expect a executable directive");
8511 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8512 // Map other list items in the map clause which are not captured variables
8513 // but "declare target link" global variables.
8514 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8515 for (const auto L : C->component_lists()) {
8516 if (!L.first)
8517 continue;
8518 const auto *VD = dyn_cast<VarDecl>(L.first);
8519 if (!VD)
8520 continue;
8521 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8523 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8524 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8525 continue;
8526 StructRangeInfoTy PartialStruct;
8527 generateInfoForComponentList(
8528 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8529 Pointers, Sizes, Types, PartialStruct,
8530 /*IsFirstComponentList=*/true, C->isImplicit());
8531 assert(!PartialStruct.Base.isValid() &&
8532 "No partial structs for declare target link expected.");
8533 }
8534 }
8535 }
8536
8537 /// Generate the default map information for a given capture \a CI,
8538 /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapBaseValuesArrayTy & CurBasePointers,MapValuesArrayTy & CurPointers,MapValuesArrayTy & CurSizes,MapFlagsArrayTy & CurMapTypes) const8539 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8540 const FieldDecl &RI, llvm::Value *CV,
8541 MapBaseValuesArrayTy &CurBasePointers,
8542 MapValuesArrayTy &CurPointers,
8543 MapValuesArrayTy &CurSizes,
8544 MapFlagsArrayTy &CurMapTypes) const {
8545 bool IsImplicit = true;
8546 // Do the default mapping.
8547 if (CI.capturesThis()) {
8548 CurBasePointers.push_back(CV);
8549 CurPointers.push_back(CV);
8550 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8551 CurSizes.push_back(
8552 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8553 CGF.Int64Ty, /*isSigned=*/true));
8554 // Default map type.
8555 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8556 } else if (CI.capturesVariableByCopy()) {
8557 CurBasePointers.push_back(CV);
8558 CurPointers.push_back(CV);
8559 if (!RI.getType()->isAnyPointerType()) {
8560 // We have to signal to the runtime captures passed by value that are
8561 // not pointers.
8562 CurMapTypes.push_back(OMP_MAP_LITERAL);
8563 CurSizes.push_back(CGF.Builder.CreateIntCast(
8564 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8565 } else {
8566 // Pointers are implicitly mapped with a zero size and no flags
8567 // (other than first map that is added for all implicit maps).
8568 CurMapTypes.push_back(OMP_MAP_NONE);
8569 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8570 }
8571 const VarDecl *VD = CI.getCapturedVar();
8572 auto I = FirstPrivateDecls.find(VD);
8573 if (I != FirstPrivateDecls.end())
8574 IsImplicit = I->getSecond();
8575 } else {
8576 assert(CI.capturesVariable() && "Expected captured reference.");
8577 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8578 QualType ElementType = PtrTy->getPointeeType();
8579 CurSizes.push_back(CGF.Builder.CreateIntCast(
8580 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8581 // The default map type for a scalar/complex type is 'to' because by
8582 // default the value doesn't have to be retrieved. For an aggregate
8583 // type, the default is 'tofrom'.
8584 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8585 const VarDecl *VD = CI.getCapturedVar();
8586 auto I = FirstPrivateDecls.find(VD);
8587 if (I != FirstPrivateDecls.end() &&
8588 VD->getType().isConstant(CGF.getContext())) {
8589 llvm::Constant *Addr =
8590 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8591 // Copy the value of the original variable to the new global copy.
8592 CGF.Builder.CreateMemCpy(
8593 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8594 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8595 CurSizes.back(), /*IsVolatile=*/false);
8596 // Use new global variable as the base pointers.
8597 CurBasePointers.push_back(Addr);
8598 CurPointers.push_back(Addr);
8599 } else {
8600 CurBasePointers.push_back(CV);
8601 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8602 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8603 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8604 AlignmentSource::Decl));
8605 CurPointers.push_back(PtrAddr.getPointer());
8606 } else {
8607 CurPointers.push_back(CV);
8608 }
8609 }
8610 if (I != FirstPrivateDecls.end())
8611 IsImplicit = I->getSecond();
8612 }
8613 // Every default map produces a single argument which is a target parameter.
8614 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8615
8616 // Add flag stating this is an implicit map.
8617 if (IsImplicit)
8618 CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8619 }
8620 };
8621 } // anonymous namespace
8622
8623 /// Emit the arrays used to pass the captures and map information to the
8624 /// offloading runtime library. If there is no map or capture information,
8625 /// return nullptr by reference.
8626 static void
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapBaseValuesArrayTy & BasePointers,MappableExprsHandler::MapValuesArrayTy & Pointers,MappableExprsHandler::MapValuesArrayTy & Sizes,MappableExprsHandler::MapFlagsArrayTy & MapTypes,CGOpenMPRuntime::TargetDataInfo & Info)8627 emitOffloadingArrays(CodeGenFunction &CGF,
8628 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8629 MappableExprsHandler::MapValuesArrayTy &Pointers,
8630 MappableExprsHandler::MapValuesArrayTy &Sizes,
8631 MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8632 CGOpenMPRuntime::TargetDataInfo &Info) {
8633 CodeGenModule &CGM = CGF.CGM;
8634 ASTContext &Ctx = CGF.getContext();
8635
8636 // Reset the array information.
8637 Info.clearArrayInfo();
8638 Info.NumberOfPtrs = BasePointers.size();
8639
8640 if (Info.NumberOfPtrs) {
8641 // Detect if we have any capture size requiring runtime evaluation of the
8642 // size so that a constant array could be eventually used.
8643 bool hasRuntimeEvaluationCaptureSize = false;
8644 for (llvm::Value *S : Sizes)
8645 if (!isa<llvm::Constant>(S)) {
8646 hasRuntimeEvaluationCaptureSize = true;
8647 break;
8648 }
8649
8650 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8651 QualType PointerArrayType = Ctx.getConstantArrayType(
8652 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8653 /*IndexTypeQuals=*/0);
8654
8655 Info.BasePointersArray =
8656 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8657 Info.PointersArray =
8658 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8659
8660 // If we don't have any VLA types or other types that require runtime
8661 // evaluation, we can use a constant array for the map sizes, otherwise we
8662 // need to fill up the arrays as we do for the pointers.
8663 QualType Int64Ty =
8664 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8665 if (hasRuntimeEvaluationCaptureSize) {
8666 QualType SizeArrayType = Ctx.getConstantArrayType(
8667 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8668 /*IndexTypeQuals=*/0);
8669 Info.SizesArray =
8670 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8671 } else {
8672 // We expect all the sizes to be constant, so we collect them to create
8673 // a constant array.
8674 SmallVector<llvm::Constant *, 16> ConstSizes;
8675 for (llvm::Value *S : Sizes)
8676 ConstSizes.push_back(cast<llvm::Constant>(S));
8677
8678 auto *SizesArrayInit = llvm::ConstantArray::get(
8679 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8680 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8681 auto *SizesArrayGbl = new llvm::GlobalVariable(
8682 CGM.getModule(), SizesArrayInit->getType(),
8683 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8684 SizesArrayInit, Name);
8685 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8686 Info.SizesArray = SizesArrayGbl;
8687 }
8688
8689 // The map types are always constant so we don't need to generate code to
8690 // fill arrays. Instead, we create an array constant.
8691 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8692 llvm::copy(MapTypes, Mapping.begin());
8693 llvm::Constant *MapTypesArrayInit =
8694 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8695 std::string MaptypesName =
8696 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8697 auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8698 CGM.getModule(), MapTypesArrayInit->getType(),
8699 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8700 MapTypesArrayInit, MaptypesName);
8701 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8702 Info.MapTypesArray = MapTypesArrayGbl;
8703
8704 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8705 llvm::Value *BPVal = *BasePointers[I];
8706 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8707 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8708 Info.BasePointersArray, 0, I);
8709 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8710 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8711 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8712 CGF.Builder.CreateStore(BPVal, BPAddr);
8713
8714 if (Info.requiresDevicePointerInfo())
8715 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8716 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8717
8718 llvm::Value *PVal = Pointers[I];
8719 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8720 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8721 Info.PointersArray, 0, I);
8722 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8723 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8724 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8725 CGF.Builder.CreateStore(PVal, PAddr);
8726
8727 if (hasRuntimeEvaluationCaptureSize) {
8728 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8729 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8730 Info.SizesArray,
8731 /*Idx0=*/0,
8732 /*Idx1=*/I);
8733 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8734 CGF.Builder.CreateStore(
8735 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8736 SAddr);
8737 }
8738 }
8739 }
8740 }
8741
8742 /// Emit the arguments to be passed to the runtime library based on the
8743 /// arrays of pointers, sizes and map types.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,CGOpenMPRuntime::TargetDataInfo & Info)8744 static void emitOffloadingArraysArgument(
8745 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8746 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8747 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8748 CodeGenModule &CGM = CGF.CGM;
8749 if (Info.NumberOfPtrs) {
8750 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8751 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8752 Info.BasePointersArray,
8753 /*Idx0=*/0, /*Idx1=*/0);
8754 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8755 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8756 Info.PointersArray,
8757 /*Idx0=*/0,
8758 /*Idx1=*/0);
8759 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8760 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8761 /*Idx0=*/0, /*Idx1=*/0);
8762 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8763 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8764 Info.MapTypesArray,
8765 /*Idx0=*/0,
8766 /*Idx1=*/0);
8767 } else {
8768 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8769 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8770 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8771 MapTypesArrayArg =
8772 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8773 }
8774 }
8775
8776 /// Check for inner distribute directive.
8777 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)8778 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8779 const auto *CS = D.getInnermostCapturedStmt();
8780 const auto *Body =
8781 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8782 const Stmt *ChildStmt =
8783 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8784
8785 if (const auto *NestedDir =
8786 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8787 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8788 switch (D.getDirectiveKind()) {
8789 case OMPD_target:
8790 if (isOpenMPDistributeDirective(DKind))
8791 return NestedDir;
8792 if (DKind == OMPD_teams) {
8793 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8794 /*IgnoreCaptured=*/true);
8795 if (!Body)
8796 return nullptr;
8797 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8798 if (const auto *NND =
8799 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8800 DKind = NND->getDirectiveKind();
8801 if (isOpenMPDistributeDirective(DKind))
8802 return NND;
8803 }
8804 }
8805 return nullptr;
8806 case OMPD_target_teams:
8807 if (isOpenMPDistributeDirective(DKind))
8808 return NestedDir;
8809 return nullptr;
8810 case OMPD_target_parallel:
8811 case OMPD_target_simd:
8812 case OMPD_target_parallel_for:
8813 case OMPD_target_parallel_for_simd:
8814 return nullptr;
8815 case OMPD_target_teams_distribute:
8816 case OMPD_target_teams_distribute_simd:
8817 case OMPD_target_teams_distribute_parallel_for:
8818 case OMPD_target_teams_distribute_parallel_for_simd:
8819 case OMPD_parallel:
8820 case OMPD_for:
8821 case OMPD_parallel_for:
8822 case OMPD_parallel_master:
8823 case OMPD_parallel_sections:
8824 case OMPD_for_simd:
8825 case OMPD_parallel_for_simd:
8826 case OMPD_cancel:
8827 case OMPD_cancellation_point:
8828 case OMPD_ordered:
8829 case OMPD_threadprivate:
8830 case OMPD_allocate:
8831 case OMPD_task:
8832 case OMPD_simd:
8833 case OMPD_sections:
8834 case OMPD_section:
8835 case OMPD_single:
8836 case OMPD_master:
8837 case OMPD_critical:
8838 case OMPD_taskyield:
8839 case OMPD_barrier:
8840 case OMPD_taskwait:
8841 case OMPD_taskgroup:
8842 case OMPD_atomic:
8843 case OMPD_flush:
8844 case OMPD_depobj:
8845 case OMPD_scan:
8846 case OMPD_teams:
8847 case OMPD_target_data:
8848 case OMPD_target_exit_data:
8849 case OMPD_target_enter_data:
8850 case OMPD_distribute:
8851 case OMPD_distribute_simd:
8852 case OMPD_distribute_parallel_for:
8853 case OMPD_distribute_parallel_for_simd:
8854 case OMPD_teams_distribute:
8855 case OMPD_teams_distribute_simd:
8856 case OMPD_teams_distribute_parallel_for:
8857 case OMPD_teams_distribute_parallel_for_simd:
8858 case OMPD_target_update:
8859 case OMPD_declare_simd:
8860 case OMPD_declare_variant:
8861 case OMPD_begin_declare_variant:
8862 case OMPD_end_declare_variant:
8863 case OMPD_declare_target:
8864 case OMPD_end_declare_target:
8865 case OMPD_declare_reduction:
8866 case OMPD_declare_mapper:
8867 case OMPD_taskloop:
8868 case OMPD_taskloop_simd:
8869 case OMPD_master_taskloop:
8870 case OMPD_master_taskloop_simd:
8871 case OMPD_parallel_master_taskloop:
8872 case OMPD_parallel_master_taskloop_simd:
8873 case OMPD_requires:
8874 case OMPD_unknown:
8875 default:
8876 llvm_unreachable("Unexpected directive.");
8877 }
8878 }
8879
8880 return nullptr;
8881 }
8882
8883 /// Emit the user-defined mapper function. The code generation follows the
8884 /// pattern in the example below.
8885 /// \code
8886 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8887 /// void *base, void *begin,
8888 /// int64_t size, int64_t type) {
8889 /// // Allocate space for an array section first.
8890 /// if (size > 1 && !maptype.IsDelete)
8891 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8892 /// size*sizeof(Ty), clearToFrom(type));
8893 /// // Map members.
8894 /// for (unsigned i = 0; i < size; i++) {
8895 /// // For each component specified by this mapper:
8896 /// for (auto c : all_components) {
8897 /// if (c.hasMapper())
8898 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8899 /// c.arg_type);
8900 /// else
8901 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8902 /// c.arg_begin, c.arg_size, c.arg_type);
8903 /// }
8904 /// }
8905 /// // Delete the array section.
8906 /// if (size > 1 && maptype.IsDelete)
8907 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8908 /// size*sizeof(Ty), clearToFrom(type));
8909 /// }
8910 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)8911 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8912 CodeGenFunction *CGF) {
8913 if (UDMMap.count(D) > 0)
8914 return;
8915 ASTContext &C = CGM.getContext();
8916 QualType Ty = D->getType();
8917 QualType PtrTy = C.getPointerType(Ty).withRestrict();
8918 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8919 auto *MapperVarDecl =
8920 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8921 SourceLocation Loc = D->getLocation();
8922 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8923
8924 // Prepare mapper function arguments and attributes.
8925 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8926 C.VoidPtrTy, ImplicitParamDecl::Other);
8927 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8928 ImplicitParamDecl::Other);
8929 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8930 C.VoidPtrTy, ImplicitParamDecl::Other);
8931 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8932 ImplicitParamDecl::Other);
8933 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8934 ImplicitParamDecl::Other);
8935 FunctionArgList Args;
8936 Args.push_back(&HandleArg);
8937 Args.push_back(&BaseArg);
8938 Args.push_back(&BeginArg);
8939 Args.push_back(&SizeArg);
8940 Args.push_back(&TypeArg);
8941 const CGFunctionInfo &FnInfo =
8942 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8943 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8944 SmallString<64> TyStr;
8945 llvm::raw_svector_ostream Out(TyStr);
8946 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8947 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8948 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8949 Name, &CGM.getModule());
8950 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8951 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8952 // Start the mapper function code generation.
8953 CodeGenFunction MapperCGF(CGM);
8954 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8955 // Compute the starting and end addreses of array elements.
8956 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8957 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8958 C.getPointerType(Int64Ty), Loc);
8959 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8960 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8961 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8962 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8963 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8964 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8965 C.getPointerType(Int64Ty), Loc);
8966 // Prepare common arguments for array initiation and deletion.
8967 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8968 MapperCGF.GetAddrOfLocalVar(&HandleArg),
8969 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8970 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8971 MapperCGF.GetAddrOfLocalVar(&BaseArg),
8972 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8973 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8974 MapperCGF.GetAddrOfLocalVar(&BeginArg),
8975 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8976
8977 // Emit array initiation if this is an array section and \p MapType indicates
8978 // that memory allocation is required.
8979 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8980 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8981 ElementSize, HeadBB, /*IsInit=*/true);
8982
8983 // Emit a for loop to iterate through SizeArg of elements and map all of them.
8984
8985 // Emit the loop header block.
8986 MapperCGF.EmitBlock(HeadBB);
8987 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8988 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8989 // Evaluate whether the initial condition is satisfied.
8990 llvm::Value *IsEmpty =
8991 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8992 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8993 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8994
8995 // Emit the loop body block.
8996 MapperCGF.EmitBlock(BodyBB);
8997 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8998 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8999 PtrPHI->addIncoming(PtrBegin, EntryBB);
9000 Address PtrCurrent =
9001 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9002 .getAlignment()
9003 .alignmentOfArrayElement(ElementSize));
9004 // Privatize the declared variable of mapper to be the current array element.
9005 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9006 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9007 return MapperCGF
9008 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9009 .getAddress(MapperCGF);
9010 });
9011 (void)Scope.Privatize();
9012
9013 // Get map clause information. Fill up the arrays with all mapped variables.
9014 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9015 MappableExprsHandler::MapValuesArrayTy Pointers;
9016 MappableExprsHandler::MapValuesArrayTy Sizes;
9017 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9018 MappableExprsHandler MEHandler(*D, MapperCGF);
9019 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9020
9021 // Call the runtime API __tgt_mapper_num_components to get the number of
9022 // pre-existing components.
9023 llvm::Value *OffloadingArgs[] = {Handle};
9024 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9025 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9026 OMPRTL___tgt_mapper_num_components),
9027 OffloadingArgs);
9028 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9029 PreviousSize,
9030 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9031
9032 // Fill up the runtime mapper handle for all components.
9033 for (unsigned I = 0; I < BasePointers.size(); ++I) {
9034 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9035 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9036 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9037 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9038 llvm::Value *CurSizeArg = Sizes[I];
9039
9040 // Extract the MEMBER_OF field from the map type.
9041 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9042 MapperCGF.EmitBlock(MemberBB);
9043 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9044 llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9045 OriMapType,
9046 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9047 llvm::BasicBlock *MemberCombineBB =
9048 MapperCGF.createBasicBlock("omp.member.combine");
9049 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9050 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9051 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9052 // Add the number of pre-existing components to the MEMBER_OF field if it
9053 // is valid.
9054 MapperCGF.EmitBlock(MemberCombineBB);
9055 llvm::Value *CombinedMember =
9056 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9057 // Do nothing if it is not a member of previous components.
9058 MapperCGF.EmitBlock(TypeBB);
9059 llvm::PHINode *MemberMapType =
9060 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9061 MemberMapType->addIncoming(OriMapType, MemberBB);
9062 MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9063
9064 // Combine the map type inherited from user-defined mapper with that
9065 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9066 // bits of the \a MapType, which is the input argument of the mapper
9067 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9068 // bits of MemberMapType.
9069 // [OpenMP 5.0], 1.2.6. map-type decay.
9070 // | alloc | to | from | tofrom | release | delete
9071 // ----------------------------------------------------------
9072 // alloc | alloc | alloc | alloc | alloc | release | delete
9073 // to | alloc | to | alloc | to | release | delete
9074 // from | alloc | alloc | from | from | release | delete
9075 // tofrom | alloc | to | from | tofrom | release | delete
9076 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9077 MapType,
9078 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9079 MappableExprsHandler::OMP_MAP_FROM));
9080 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9081 llvm::BasicBlock *AllocElseBB =
9082 MapperCGF.createBasicBlock("omp.type.alloc.else");
9083 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9084 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9085 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9086 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9087 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9088 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9089 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9090 MapperCGF.EmitBlock(AllocBB);
9091 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9092 MemberMapType,
9093 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9094 MappableExprsHandler::OMP_MAP_FROM)));
9095 MapperCGF.Builder.CreateBr(EndBB);
9096 MapperCGF.EmitBlock(AllocElseBB);
9097 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9098 LeftToFrom,
9099 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9100 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9101 // In case of to, clear OMP_MAP_FROM.
9102 MapperCGF.EmitBlock(ToBB);
9103 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9104 MemberMapType,
9105 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9106 MapperCGF.Builder.CreateBr(EndBB);
9107 MapperCGF.EmitBlock(ToElseBB);
9108 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9109 LeftToFrom,
9110 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9111 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9112 // In case of from, clear OMP_MAP_TO.
9113 MapperCGF.EmitBlock(FromBB);
9114 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9115 MemberMapType,
9116 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9117 // In case of tofrom, do nothing.
9118 MapperCGF.EmitBlock(EndBB);
9119 llvm::PHINode *CurMapType =
9120 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9121 CurMapType->addIncoming(AllocMapType, AllocBB);
9122 CurMapType->addIncoming(ToMapType, ToBB);
9123 CurMapType->addIncoming(FromMapType, FromBB);
9124 CurMapType->addIncoming(MemberMapType, ToElseBB);
9125
9126 // TODO: call the corresponding mapper function if a user-defined mapper is
9127 // associated with this map clause.
9128 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9129 // data structure.
9130 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9131 CurSizeArg, CurMapType};
9132 MapperCGF.EmitRuntimeCall(
9133 OMPBuilder.getOrCreateRuntimeFunction(
9134 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9135 OffloadingArgs);
9136 }
9137
9138 // Update the pointer to point to the next element that needs to be mapped,
9139 // and check whether we have mapped all elements.
9140 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9141 PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9142 PtrPHI->addIncoming(PtrNext, BodyBB);
9143 llvm::Value *IsDone =
9144 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9145 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9146 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9147
9148 MapperCGF.EmitBlock(ExitBB);
9149 // Emit array deletion if this is an array section and \p MapType indicates
9150 // that deletion is required.
9151 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9152 ElementSize, DoneBB, /*IsInit=*/false);
9153
9154 // Emit the function exit block.
9155 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9156 MapperCGF.FinishFunction();
9157 UDMMap.try_emplace(D, Fn);
9158 if (CGF) {
9159 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9160 Decls.second.push_back(D);
9161 }
9162 }
9163
9164 /// Emit the array initialization or deletion portion for user-defined mapper
9165 /// code generation. First, it evaluates whether an array section is mapped and
9166 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9167 /// true, and \a MapType indicates to not delete this array, array
9168 /// initialization code is generated. If \a IsInit is false, and \a MapType
9169 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)9170 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9171 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9172 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9173 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9174 StringRef Prefix = IsInit ? ".init" : ".del";
9175
9176 // Evaluate if this is an array section.
9177 llvm::BasicBlock *IsDeleteBB =
9178 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9179 llvm::BasicBlock *BodyBB =
9180 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9181 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9182 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9183 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9184
9185 // Evaluate if we are going to delete this section.
9186 MapperCGF.EmitBlock(IsDeleteBB);
9187 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9188 MapType,
9189 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9190 llvm::Value *DeleteCond;
9191 if (IsInit) {
9192 DeleteCond = MapperCGF.Builder.CreateIsNull(
9193 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9194 } else {
9195 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9196 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9197 }
9198 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9199
9200 MapperCGF.EmitBlock(BodyBB);
9201 // Get the array size by multiplying element size and element number (i.e., \p
9202 // Size).
9203 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9204 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9205 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9206 // memory allocation/deletion purpose only.
9207 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9208 MapType,
9209 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9210 MappableExprsHandler::OMP_MAP_FROM)));
9211 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9212 // data structure.
9213 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9214 MapperCGF.EmitRuntimeCall(
9215 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9216 OMPRTL___tgt_push_mapper_component),
9217 OffloadingArgs);
9218 }
9219
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * DeviceID,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9220 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9221 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9222 llvm::Value *DeviceID,
9223 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9224 const OMPLoopDirective &D)>
9225 SizeEmitter) {
9226 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9227 const OMPExecutableDirective *TD = &D;
9228 // Get nested teams distribute kind directive, if any.
9229 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9230 TD = getNestedDistributeDirective(CGM.getContext(), D);
9231 if (!TD)
9232 return;
9233 const auto *LD = cast<OMPLoopDirective>(TD);
9234 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9235 PrePostActionTy &) {
9236 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9237 llvm::Value *Args[] = {DeviceID, NumIterations};
9238 CGF.EmitRuntimeCall(
9239 OMPBuilder.getOrCreateRuntimeFunction(
9240 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9241 Args);
9242 }
9243 };
9244 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9245 }
9246
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9247 void CGOpenMPRuntime::emitTargetCall(
9248 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9249 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9250 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9251 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9252 const OMPLoopDirective &D)>
9253 SizeEmitter) {
9254 if (!CGF.HaveInsertPoint())
9255 return;
9256
9257 assert(OutlinedFn && "Invalid outlined function!");
9258
9259 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9260 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9261 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9262 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9263 PrePostActionTy &) {
9264 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9265 };
9266 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9267
9268 CodeGenFunction::OMPTargetDataInfo InputInfo;
9269 llvm::Value *MapTypesArray = nullptr;
9270 // Fill up the pointer arrays and transfer execution to the device.
9271 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9272 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9273 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9274 if (Device.getInt() == OMPC_DEVICE_ancestor) {
9275 // Reverse offloading is not supported, so just execute on the host.
9276 if (RequiresOuterTask) {
9277 CapturedVars.clear();
9278 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9279 }
9280 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9281 return;
9282 }
9283
9284 // On top of the arrays that were filled up, the target offloading call
9285 // takes as arguments the device id as well as the host pointer. The host
9286 // pointer is used by the runtime library to identify the current target
9287 // region, so it only has to be unique and not necessarily point to
9288 // anything. It could be the pointer to the outlined function that
9289 // implements the target region, but we aren't using that so that the
9290 // compiler doesn't need to keep that, and could therefore inline the host
9291 // function if proven worthwhile during optimization.
9292
9293 // From this point on, we need to have an ID of the target region defined.
9294 assert(OutlinedFnID && "Invalid outlined function ID!");
9295
9296 // Emit device ID if any.
9297 llvm::Value *DeviceID;
9298 if (Device.getPointer()) {
9299 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9300 Device.getInt() == OMPC_DEVICE_device_num) &&
9301 "Expected device_num modifier.");
9302 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9303 DeviceID =
9304 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9305 } else {
9306 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9307 }
9308
9309 // Emit the number of elements in the offloading arrays.
9310 llvm::Value *PointerNum =
9311 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9312
9313 // Return value of the runtime offloading call.
9314 llvm::Value *Return;
9315
9316 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9317 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9318
9319 // Emit tripcount for the target loop-based directive.
9320 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9321
9322 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9323 // The target region is an outlined function launched by the runtime
9324 // via calls __tgt_target() or __tgt_target_teams().
9325 //
9326 // __tgt_target() launches a target region with one team and one thread,
9327 // executing a serial region. This master thread may in turn launch
9328 // more threads within its team upon encountering a parallel region,
9329 // however, no additional teams can be launched on the device.
9330 //
9331 // __tgt_target_teams() launches a target region with one or more teams,
9332 // each with one or more threads. This call is required for target
9333 // constructs such as:
9334 // 'target teams'
9335 // 'target' / 'teams'
9336 // 'target teams distribute parallel for'
9337 // 'target parallel'
9338 // and so on.
9339 //
9340 // Note that on the host and CPU targets, the runtime implementation of
9341 // these calls simply call the outlined function without forking threads.
9342 // The outlined functions themselves have runtime calls to
9343 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9344 // the compiler in emitTeamsCall() and emitParallelCall().
9345 //
9346 // In contrast, on the NVPTX target, the implementation of
9347 // __tgt_target_teams() launches a GPU kernel with the requested number
9348 // of teams and threads so no additional calls to the runtime are required.
9349 if (NumTeams) {
9350 // If we have NumTeams defined this means that we have an enclosed teams
9351 // region. Therefore we also expect to have NumThreads defined. These two
9352 // values should be defined in the presence of a teams directive,
9353 // regardless of having any clauses associated. If the user is using teams
9354 // but no clauses, these two values will be the default that should be
9355 // passed to the runtime library - a 32-bit integer with the value zero.
9356 assert(NumThreads && "Thread limit expression should be available along "
9357 "with number of teams.");
9358 llvm::Value *OffloadingArgs[] = {DeviceID,
9359 OutlinedFnID,
9360 PointerNum,
9361 InputInfo.BasePointersArray.getPointer(),
9362 InputInfo.PointersArray.getPointer(),
9363 InputInfo.SizesArray.getPointer(),
9364 MapTypesArray,
9365 NumTeams,
9366 NumThreads};
9367 Return = CGF.EmitRuntimeCall(
9368 OMPBuilder.getOrCreateRuntimeFunction(
9369 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9370 : OMPRTL___tgt_target_teams),
9371 OffloadingArgs);
9372 } else {
9373 llvm::Value *OffloadingArgs[] = {DeviceID,
9374 OutlinedFnID,
9375 PointerNum,
9376 InputInfo.BasePointersArray.getPointer(),
9377 InputInfo.PointersArray.getPointer(),
9378 InputInfo.SizesArray.getPointer(),
9379 MapTypesArray};
9380 Return = CGF.EmitRuntimeCall(
9381 OMPBuilder.getOrCreateRuntimeFunction(
9382 CGM.getModule(),
9383 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9384 OffloadingArgs);
9385 }
9386
9387 // Check the error code and execute the host version if required.
9388 llvm::BasicBlock *OffloadFailedBlock =
9389 CGF.createBasicBlock("omp_offload.failed");
9390 llvm::BasicBlock *OffloadContBlock =
9391 CGF.createBasicBlock("omp_offload.cont");
9392 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9393 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9394
9395 CGF.EmitBlock(OffloadFailedBlock);
9396 if (RequiresOuterTask) {
9397 CapturedVars.clear();
9398 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9399 }
9400 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9401 CGF.EmitBranch(OffloadContBlock);
9402
9403 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9404 };
9405
9406 // Notify that the host version must be executed.
9407 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9408 RequiresOuterTask](CodeGenFunction &CGF,
9409 PrePostActionTy &) {
9410 if (RequiresOuterTask) {
9411 CapturedVars.clear();
9412 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9413 }
9414 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9415 };
9416
9417 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9418 &CapturedVars, RequiresOuterTask,
9419 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9420 // Fill up the arrays with all the captured variables.
9421 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9422 MappableExprsHandler::MapValuesArrayTy Pointers;
9423 MappableExprsHandler::MapValuesArrayTy Sizes;
9424 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9425
9426 // Get mappable expression information.
9427 MappableExprsHandler MEHandler(D, CGF);
9428 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9429
9430 auto RI = CS.getCapturedRecordDecl()->field_begin();
9431 auto CV = CapturedVars.begin();
9432 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9433 CE = CS.capture_end();
9434 CI != CE; ++CI, ++RI, ++CV) {
9435 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9436 MappableExprsHandler::MapValuesArrayTy CurPointers;
9437 MappableExprsHandler::MapValuesArrayTy CurSizes;
9438 MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9439 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9440
9441 // VLA sizes are passed to the outlined region by copy and do not have map
9442 // information associated.
9443 if (CI->capturesVariableArrayType()) {
9444 CurBasePointers.push_back(*CV);
9445 CurPointers.push_back(*CV);
9446 CurSizes.push_back(CGF.Builder.CreateIntCast(
9447 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9448 // Copy to the device as an argument. No need to retrieve it.
9449 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9450 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9451 MappableExprsHandler::OMP_MAP_IMPLICIT);
9452 } else {
9453 // If we have any information in the map clause, we use it, otherwise we
9454 // just do a default mapping.
9455 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9456 CurSizes, CurMapTypes, PartialStruct);
9457 if (CurBasePointers.empty())
9458 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9459 CurPointers, CurSizes, CurMapTypes);
9460 // Generate correct mapping for variables captured by reference in
9461 // lambdas.
9462 if (CI->capturesVariable())
9463 MEHandler.generateInfoForLambdaCaptures(
9464 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9465 CurMapTypes, LambdaPointers);
9466 }
9467 // We expect to have at least an element of information for this capture.
9468 assert(!CurBasePointers.empty() &&
9469 "Non-existing map pointer for capture!");
9470 assert(CurBasePointers.size() == CurPointers.size() &&
9471 CurBasePointers.size() == CurSizes.size() &&
9472 CurBasePointers.size() == CurMapTypes.size() &&
9473 "Inconsistent map information sizes!");
9474
9475 // If there is an entry in PartialStruct it means we have a struct with
9476 // individual members mapped. Emit an extra combined entry.
9477 if (PartialStruct.Base.isValid())
9478 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9479 CurMapTypes, PartialStruct);
9480
9481 // We need to append the results of this capture to what we already have.
9482 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9483 Pointers.append(CurPointers.begin(), CurPointers.end());
9484 Sizes.append(CurSizes.begin(), CurSizes.end());
9485 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9486 }
9487 // Adjust MEMBER_OF flags for the lambdas captures.
9488 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9489 Pointers, MapTypes);
9490 // Map other list items in the map clause which are not captured variables
9491 // but "declare target link" global variables.
9492 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9493 MapTypes);
9494
9495 TargetDataInfo Info;
9496 // Fill up the arrays and create the arguments.
9497 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9498 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9499 Info.PointersArray, Info.SizesArray,
9500 Info.MapTypesArray, Info);
9501 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9502 InputInfo.BasePointersArray =
9503 Address(Info.BasePointersArray, CGM.getPointerAlign());
9504 InputInfo.PointersArray =
9505 Address(Info.PointersArray, CGM.getPointerAlign());
9506 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9507 MapTypesArray = Info.MapTypesArray;
9508 if (RequiresOuterTask)
9509 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9510 else
9511 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9512 };
9513
9514 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9515 CodeGenFunction &CGF, PrePostActionTy &) {
9516 if (RequiresOuterTask) {
9517 CodeGenFunction::OMPTargetDataInfo InputInfo;
9518 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9519 } else {
9520 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9521 }
9522 };
9523
9524 // If we have a target function ID it means that we need to support
9525 // offloading, otherwise, just execute on the host. We need to execute on host
9526 // regardless of the conditional in the if clause if, e.g., the user do not
9527 // specify target triples.
9528 if (OutlinedFnID) {
9529 if (IfCond) {
9530 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9531 } else {
9532 RegionCodeGenTy ThenRCG(TargetThenGen);
9533 ThenRCG(CGF);
9534 }
9535 } else {
9536 RegionCodeGenTy ElseRCG(TargetElseGen);
9537 ElseRCG(CGF);
9538 }
9539 }
9540
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9541 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9542 StringRef ParentName) {
9543 if (!S)
9544 return;
9545
9546 // Codegen OMP target directives that offload compute to the device.
9547 bool RequiresDeviceCodegen =
9548 isa<OMPExecutableDirective>(S) &&
9549 isOpenMPTargetExecutionDirective(
9550 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9551
9552 if (RequiresDeviceCodegen) {
9553 const auto &E = *cast<OMPExecutableDirective>(S);
9554 unsigned DeviceID;
9555 unsigned FileID;
9556 unsigned Line;
9557 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9558 FileID, Line);
9559
9560 // Is this a target region that should not be emitted as an entry point? If
9561 // so just signal we are done with this target region.
9562 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9563 ParentName, Line))
9564 return;
9565
9566 switch (E.getDirectiveKind()) {
9567 case OMPD_target:
9568 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9569 cast<OMPTargetDirective>(E));
9570 break;
9571 case OMPD_target_parallel:
9572 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9573 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9574 break;
9575 case OMPD_target_teams:
9576 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9577 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9578 break;
9579 case OMPD_target_teams_distribute:
9580 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9581 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9582 break;
9583 case OMPD_target_teams_distribute_simd:
9584 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9585 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9586 break;
9587 case OMPD_target_parallel_for:
9588 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9589 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9590 break;
9591 case OMPD_target_parallel_for_simd:
9592 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9593 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9594 break;
9595 case OMPD_target_simd:
9596 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9597 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9598 break;
9599 case OMPD_target_teams_distribute_parallel_for:
9600 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9601 CGM, ParentName,
9602 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9603 break;
9604 case OMPD_target_teams_distribute_parallel_for_simd:
9605 CodeGenFunction::
9606 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9607 CGM, ParentName,
9608 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9609 break;
9610 case OMPD_parallel:
9611 case OMPD_for:
9612 case OMPD_parallel_for:
9613 case OMPD_parallel_master:
9614 case OMPD_parallel_sections:
9615 case OMPD_for_simd:
9616 case OMPD_parallel_for_simd:
9617 case OMPD_cancel:
9618 case OMPD_cancellation_point:
9619 case OMPD_ordered:
9620 case OMPD_threadprivate:
9621 case OMPD_allocate:
9622 case OMPD_task:
9623 case OMPD_simd:
9624 case OMPD_sections:
9625 case OMPD_section:
9626 case OMPD_single:
9627 case OMPD_master:
9628 case OMPD_critical:
9629 case OMPD_taskyield:
9630 case OMPD_barrier:
9631 case OMPD_taskwait:
9632 case OMPD_taskgroup:
9633 case OMPD_atomic:
9634 case OMPD_flush:
9635 case OMPD_depobj:
9636 case OMPD_scan:
9637 case OMPD_teams:
9638 case OMPD_target_data:
9639 case OMPD_target_exit_data:
9640 case OMPD_target_enter_data:
9641 case OMPD_distribute:
9642 case OMPD_distribute_simd:
9643 case OMPD_distribute_parallel_for:
9644 case OMPD_distribute_parallel_for_simd:
9645 case OMPD_teams_distribute:
9646 case OMPD_teams_distribute_simd:
9647 case OMPD_teams_distribute_parallel_for:
9648 case OMPD_teams_distribute_parallel_for_simd:
9649 case OMPD_target_update:
9650 case OMPD_declare_simd:
9651 case OMPD_declare_variant:
9652 case OMPD_begin_declare_variant:
9653 case OMPD_end_declare_variant:
9654 case OMPD_declare_target:
9655 case OMPD_end_declare_target:
9656 case OMPD_declare_reduction:
9657 case OMPD_declare_mapper:
9658 case OMPD_taskloop:
9659 case OMPD_taskloop_simd:
9660 case OMPD_master_taskloop:
9661 case OMPD_master_taskloop_simd:
9662 case OMPD_parallel_master_taskloop:
9663 case OMPD_parallel_master_taskloop_simd:
9664 case OMPD_requires:
9665 case OMPD_unknown:
9666 default:
9667 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9668 }
9669 return;
9670 }
9671
9672 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9673 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9674 return;
9675
9676 scanForTargetRegionsFunctions(
9677 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9678 return;
9679 }
9680
9681 // If this is a lambda function, look into its body.
9682 if (const auto *L = dyn_cast<LambdaExpr>(S))
9683 S = L->getBody();
9684
9685 // Keep looking for target regions recursively.
9686 for (const Stmt *II : S->children())
9687 scanForTargetRegionsFunctions(II, ParentName);
9688 }
9689
emitTargetFunctions(GlobalDecl GD)9690 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9691 // If emitting code for the host, we do not process FD here. Instead we do
9692 // the normal code generation.
9693 if (!CGM.getLangOpts().OpenMPIsDevice) {
9694 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9695 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9696 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9697 // Do not emit device_type(nohost) functions for the host.
9698 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9699 return true;
9700 }
9701 return false;
9702 }
9703
9704 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9705 // Try to detect target regions in the function.
9706 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9707 StringRef Name = CGM.getMangledName(GD);
9708 scanForTargetRegionsFunctions(FD->getBody(), Name);
9709 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9710 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9711 // Do not emit device_type(nohost) functions for the host.
9712 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9713 return true;
9714 }
9715
9716 // Do not to emit function if it is not marked as declare target.
9717 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9718 AlreadyEmittedTargetDecls.count(VD) == 0;
9719 }
9720
emitTargetGlobalVariable(GlobalDecl GD)9721 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9722 if (!CGM.getLangOpts().OpenMPIsDevice)
9723 return false;
9724
9725 // Check if there are Ctors/Dtors in this declaration and look for target
9726 // regions in it. We use the complete variant to produce the kernel name
9727 // mangling.
9728 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9729 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9730 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9731 StringRef ParentName =
9732 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9733 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9734 }
9735 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9736 StringRef ParentName =
9737 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9738 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9739 }
9740 }
9741
9742 // Do not to emit variable if it is not marked as declare target.
9743 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9744 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9745 cast<VarDecl>(GD.getDecl()));
9746 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9747 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9748 HasRequiresUnifiedSharedMemory)) {
9749 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9750 return true;
9751 }
9752 return false;
9753 }
9754
9755 llvm::Constant *
registerTargetFirstprivateCopy(CodeGenFunction & CGF,const VarDecl * VD)9756 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9757 const VarDecl *VD) {
9758 assert(VD->getType().isConstant(CGM.getContext()) &&
9759 "Expected constant variable.");
9760 StringRef VarName;
9761 llvm::Constant *Addr;
9762 llvm::GlobalValue::LinkageTypes Linkage;
9763 QualType Ty = VD->getType();
9764 SmallString<128> Buffer;
9765 {
9766 unsigned DeviceID;
9767 unsigned FileID;
9768 unsigned Line;
9769 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9770 FileID, Line);
9771 llvm::raw_svector_ostream OS(Buffer);
9772 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9773 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9774 VarName = OS.str();
9775 }
9776 Linkage = llvm::GlobalValue::InternalLinkage;
9777 Addr =
9778 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9779 getDefaultFirstprivateAddressSpace());
9780 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9781 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9782 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9783 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9784 VarName, Addr, VarSize,
9785 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9786 return Addr;
9787 }
9788
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)9789 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9790 llvm::Constant *Addr) {
9791 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9792 !CGM.getLangOpts().OpenMPIsDevice)
9793 return;
9794 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9795 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9796 if (!Res) {
9797 if (CGM.getLangOpts().OpenMPIsDevice) {
9798 // Register non-target variables being emitted in device code (debug info
9799 // may cause this).
9800 StringRef VarName = CGM.getMangledName(VD);
9801 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9802 }
9803 return;
9804 }
9805 // Register declare target variables.
9806 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9807 StringRef VarName;
9808 CharUnits VarSize;
9809 llvm::GlobalValue::LinkageTypes Linkage;
9810
9811 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9812 !HasRequiresUnifiedSharedMemory) {
9813 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9814 VarName = CGM.getMangledName(VD);
9815 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9816 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9817 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9818 } else {
9819 VarSize = CharUnits::Zero();
9820 }
9821 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9822 // Temp solution to prevent optimizations of the internal variables.
9823 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9824 std::string RefName = getName({VarName, "ref"});
9825 if (!CGM.GetGlobalValue(RefName)) {
9826 llvm::Constant *AddrRef =
9827 getOrCreateInternalVariable(Addr->getType(), RefName);
9828 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9829 GVAddrRef->setConstant(/*Val=*/true);
9830 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9831 GVAddrRef->setInitializer(Addr);
9832 CGM.addCompilerUsedGlobal(GVAddrRef);
9833 }
9834 }
9835 } else {
9836 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9837 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9838 HasRequiresUnifiedSharedMemory)) &&
9839 "Declare target attribute must link or to with unified memory.");
9840 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9841 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9842 else
9843 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9844
9845 if (CGM.getLangOpts().OpenMPIsDevice) {
9846 VarName = Addr->getName();
9847 Addr = nullptr;
9848 } else {
9849 VarName = getAddrOfDeclareTargetVar(VD).getName();
9850 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9851 }
9852 VarSize = CGM.getPointerSize();
9853 Linkage = llvm::GlobalValue::WeakAnyLinkage;
9854 }
9855
9856 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9857 VarName, Addr, VarSize, Flags, Linkage);
9858 }
9859
emitTargetGlobal(GlobalDecl GD)9860 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9861 if (isa<FunctionDecl>(GD.getDecl()) ||
9862 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9863 return emitTargetFunctions(GD);
9864
9865 return emitTargetGlobalVariable(GD);
9866 }
9867
emitDeferredTargetDecls() const9868 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9869 for (const VarDecl *VD : DeferredGlobalVariables) {
9870 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9871 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9872 if (!Res)
9873 continue;
9874 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9875 !HasRequiresUnifiedSharedMemory) {
9876 CGM.EmitGlobal(VD);
9877 } else {
9878 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9879 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9880 HasRequiresUnifiedSharedMemory)) &&
9881 "Expected link clause or to clause with unified memory.");
9882 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9883 }
9884 }
9885 }
9886
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const9887 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9888 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9889 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9890 " Expected target-based directive.");
9891 }
9892
processRequiresDirective(const OMPRequiresDecl * D)9893 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9894 for (const OMPClause *Clause : D->clauselists()) {
9895 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9896 HasRequiresUnifiedSharedMemory = true;
9897 } else if (const auto *AC =
9898 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9899 switch (AC->getAtomicDefaultMemOrderKind()) {
9900 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9901 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9902 break;
9903 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9904 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9905 break;
9906 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9907 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9908 break;
9909 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9910 break;
9911 }
9912 }
9913 }
9914 }
9915
getDefaultMemoryOrdering() const9916 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9917 return RequiresAtomicOrdering;
9918 }
9919
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)9920 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9921 LangAS &AS) {
9922 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9923 return false;
9924 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9925 switch(A->getAllocatorType()) {
9926 case OMPAllocateDeclAttr::OMPNullMemAlloc:
9927 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9928 // Not supported, fallback to the default mem space.
9929 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9930 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9931 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9932 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9933 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9934 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9935 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9936 AS = LangAS::Default;
9937 return true;
9938 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9939 llvm_unreachable("Expected predefined allocator for the variables with the "
9940 "static storage.");
9941 }
9942 return false;
9943 }
9944
hasRequiresUnifiedSharedMemory() const9945 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9946 return HasRequiresUnifiedSharedMemory;
9947 }
9948
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)9949 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9950 CodeGenModule &CGM)
9951 : CGM(CGM) {
9952 if (CGM.getLangOpts().OpenMPIsDevice) {
9953 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9954 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9955 }
9956 }
9957
~DisableAutoDeclareTargetRAII()9958 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9959 if (CGM.getLangOpts().OpenMPIsDevice)
9960 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9961 }
9962
markAsGlobalTarget(GlobalDecl GD)9963 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9964 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9965 return true;
9966
9967 const auto *D = cast<FunctionDecl>(GD.getDecl());
9968 // Do not to emit function if it is marked as declare target as it was already
9969 // emitted.
9970 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9971 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9972 if (auto *F = dyn_cast_or_null<llvm::Function>(
9973 CGM.GetGlobalValue(CGM.getMangledName(GD))))
9974 return !F->isDeclaration();
9975 return false;
9976 }
9977 return true;
9978 }
9979
9980 return !AlreadyEmittedTargetDecls.insert(D).second;
9981 }
9982
emitRequiresDirectiveRegFun()9983 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9984 // If we don't have entries or if we are emitting code for the device, we
9985 // don't need to do anything.
9986 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9987 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9988 (OffloadEntriesInfoManager.empty() &&
9989 !HasEmittedDeclareTargetRegion &&
9990 !HasEmittedTargetRegion))
9991 return nullptr;
9992
9993 // Create and register the function that handles the requires directives.
9994 ASTContext &C = CGM.getContext();
9995
9996 llvm::Function *RequiresRegFn;
9997 {
9998 CodeGenFunction CGF(CGM);
9999 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10000 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10001 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10002 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10003 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10004 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10005 // TODO: check for other requires clauses.
10006 // The requires directive takes effect only when a target region is
10007 // present in the compilation unit. Otherwise it is ignored and not
10008 // passed to the runtime. This avoids the runtime from throwing an error
10009 // for mismatching requires clauses across compilation units that don't
10010 // contain at least 1 target region.
10011 assert((HasEmittedTargetRegion ||
10012 HasEmittedDeclareTargetRegion ||
10013 !OffloadEntriesInfoManager.empty()) &&
10014 "Target or declare target region expected.");
10015 if (HasRequiresUnifiedSharedMemory)
10016 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10017 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10018 CGM.getModule(), OMPRTL___tgt_register_requires),
10019 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10020 CGF.FinishFunction();
10021 }
10022 return RequiresRegFn;
10023 }
10024
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10025 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10026 const OMPExecutableDirective &D,
10027 SourceLocation Loc,
10028 llvm::Function *OutlinedFn,
10029 ArrayRef<llvm::Value *> CapturedVars) {
10030 if (!CGF.HaveInsertPoint())
10031 return;
10032
10033 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10034 CodeGenFunction::RunCleanupsScope Scope(CGF);
10035
10036 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10037 llvm::Value *Args[] = {
10038 RTLoc,
10039 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10040 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10041 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10042 RealArgs.append(std::begin(Args), std::end(Args));
10043 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10044
10045 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10046 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10047 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10048 }
10049
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10050 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10051 const Expr *NumTeams,
10052 const Expr *ThreadLimit,
10053 SourceLocation Loc) {
10054 if (!CGF.HaveInsertPoint())
10055 return;
10056
10057 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10058
10059 llvm::Value *NumTeamsVal =
10060 NumTeams
10061 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10062 CGF.CGM.Int32Ty, /* isSigned = */ true)
10063 : CGF.Builder.getInt32(0);
10064
10065 llvm::Value *ThreadLimitVal =
10066 ThreadLimit
10067 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10068 CGF.CGM.Int32Ty, /* isSigned = */ true)
10069 : CGF.Builder.getInt32(0);
10070
10071 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10072 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10073 ThreadLimitVal};
10074 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10075 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10076 PushNumTeamsArgs);
10077 }
10078
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)10079 void CGOpenMPRuntime::emitTargetDataCalls(
10080 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10081 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10082 if (!CGF.HaveInsertPoint())
10083 return;
10084
10085 // Action used to replace the default codegen action and turn privatization
10086 // off.
10087 PrePostActionTy NoPrivAction;
10088
10089 // Generate the code for the opening of the data environment. Capture all the
10090 // arguments of the runtime call by reference because they are used in the
10091 // closing of the region.
10092 auto &&BeginThenGen = [this, &D, Device, &Info,
10093 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10094 // Fill up the arrays with all the mapped variables.
10095 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10096 MappableExprsHandler::MapValuesArrayTy Pointers;
10097 MappableExprsHandler::MapValuesArrayTy Sizes;
10098 MappableExprsHandler::MapFlagsArrayTy MapTypes;
10099
10100 // Get map clause information.
10101 MappableExprsHandler MCHandler(D, CGF);
10102 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10103
10104 // Fill up the arrays and create the arguments.
10105 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10106
10107 llvm::Value *BasePointersArrayArg = nullptr;
10108 llvm::Value *PointersArrayArg = nullptr;
10109 llvm::Value *SizesArrayArg = nullptr;
10110 llvm::Value *MapTypesArrayArg = nullptr;
10111 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10112 SizesArrayArg, MapTypesArrayArg, Info);
10113
10114 // Emit device ID if any.
10115 llvm::Value *DeviceID = nullptr;
10116 if (Device) {
10117 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10118 CGF.Int64Ty, /*isSigned=*/true);
10119 } else {
10120 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10121 }
10122
10123 // Emit the number of elements in the offloading arrays.
10124 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10125
10126 llvm::Value *OffloadingArgs[] = {
10127 DeviceID, PointerNum, BasePointersArrayArg,
10128 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10130 CGM.getModule(), OMPRTL___tgt_target_data_begin),
10131 OffloadingArgs);
10132
10133 // If device pointer privatization is required, emit the body of the region
10134 // here. It will have to be duplicated: with and without privatization.
10135 if (!Info.CaptureDeviceAddrMap.empty())
10136 CodeGen(CGF);
10137 };
10138
10139 // Generate code for the closing of the data region.
10140 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10141 PrePostActionTy &) {
10142 assert(Info.isValid() && "Invalid data environment closing arguments.");
10143
10144 llvm::Value *BasePointersArrayArg = nullptr;
10145 llvm::Value *PointersArrayArg = nullptr;
10146 llvm::Value *SizesArrayArg = nullptr;
10147 llvm::Value *MapTypesArrayArg = nullptr;
10148 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10149 SizesArrayArg, MapTypesArrayArg, Info);
10150
10151 // Emit device ID if any.
10152 llvm::Value *DeviceID = nullptr;
10153 if (Device) {
10154 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10155 CGF.Int64Ty, /*isSigned=*/true);
10156 } else {
10157 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10158 }
10159
10160 // Emit the number of elements in the offloading arrays.
10161 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10162
10163 llvm::Value *OffloadingArgs[] = {
10164 DeviceID, PointerNum, BasePointersArrayArg,
10165 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10166 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10167 CGM.getModule(), OMPRTL___tgt_target_data_end),
10168 OffloadingArgs);
10169 };
10170
10171 // If we need device pointer privatization, we need to emit the body of the
10172 // region with no privatization in the 'else' branch of the conditional.
10173 // Otherwise, we don't have to do anything.
10174 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10175 PrePostActionTy &) {
10176 if (!Info.CaptureDeviceAddrMap.empty()) {
10177 CodeGen.setAction(NoPrivAction);
10178 CodeGen(CGF);
10179 }
10180 };
10181
10182 // We don't have to do anything to close the region if the if clause evaluates
10183 // to false.
10184 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10185
10186 if (IfCond) {
10187 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10188 } else {
10189 RegionCodeGenTy RCG(BeginThenGen);
10190 RCG(CGF);
10191 }
10192
10193 // If we don't require privatization of device pointers, we emit the body in
10194 // between the runtime calls. This avoids duplicating the body code.
10195 if (Info.CaptureDeviceAddrMap.empty()) {
10196 CodeGen.setAction(NoPrivAction);
10197 CodeGen(CGF);
10198 }
10199
10200 if (IfCond) {
10201 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10202 } else {
10203 RegionCodeGenTy RCG(EndThenGen);
10204 RCG(CGF);
10205 }
10206 }
10207
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10208 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10209 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10210 const Expr *Device) {
10211 if (!CGF.HaveInsertPoint())
10212 return;
10213
10214 assert((isa<OMPTargetEnterDataDirective>(D) ||
10215 isa<OMPTargetExitDataDirective>(D) ||
10216 isa<OMPTargetUpdateDirective>(D)) &&
10217 "Expecting either target enter, exit data, or update directives.");
10218
10219 CodeGenFunction::OMPTargetDataInfo InputInfo;
10220 llvm::Value *MapTypesArray = nullptr;
10221 // Generate the code for the opening of the data environment.
10222 auto &&ThenGen = [this, &D, Device, &InputInfo,
10223 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10224 // Emit device ID if any.
10225 llvm::Value *DeviceID = nullptr;
10226 if (Device) {
10227 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10228 CGF.Int64Ty, /*isSigned=*/true);
10229 } else {
10230 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10231 }
10232
10233 // Emit the number of elements in the offloading arrays.
10234 llvm::Constant *PointerNum =
10235 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10236
10237 llvm::Value *OffloadingArgs[] = {DeviceID,
10238 PointerNum,
10239 InputInfo.BasePointersArray.getPointer(),
10240 InputInfo.PointersArray.getPointer(),
10241 InputInfo.SizesArray.getPointer(),
10242 MapTypesArray};
10243
10244 // Select the right runtime function call for each expected standalone
10245 // directive.
10246 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10247 RuntimeFunction RTLFn;
10248 switch (D.getDirectiveKind()) {
10249 case OMPD_target_enter_data:
10250 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10251 : OMPRTL___tgt_target_data_begin;
10252 break;
10253 case OMPD_target_exit_data:
10254 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10255 : OMPRTL___tgt_target_data_end;
10256 break;
10257 case OMPD_target_update:
10258 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10259 : OMPRTL___tgt_target_data_update;
10260 break;
10261 case OMPD_parallel:
10262 case OMPD_for:
10263 case OMPD_parallel_for:
10264 case OMPD_parallel_master:
10265 case OMPD_parallel_sections:
10266 case OMPD_for_simd:
10267 case OMPD_parallel_for_simd:
10268 case OMPD_cancel:
10269 case OMPD_cancellation_point:
10270 case OMPD_ordered:
10271 case OMPD_threadprivate:
10272 case OMPD_allocate:
10273 case OMPD_task:
10274 case OMPD_simd:
10275 case OMPD_sections:
10276 case OMPD_section:
10277 case OMPD_single:
10278 case OMPD_master:
10279 case OMPD_critical:
10280 case OMPD_taskyield:
10281 case OMPD_barrier:
10282 case OMPD_taskwait:
10283 case OMPD_taskgroup:
10284 case OMPD_atomic:
10285 case OMPD_flush:
10286 case OMPD_depobj:
10287 case OMPD_scan:
10288 case OMPD_teams:
10289 case OMPD_target_data:
10290 case OMPD_distribute:
10291 case OMPD_distribute_simd:
10292 case OMPD_distribute_parallel_for:
10293 case OMPD_distribute_parallel_for_simd:
10294 case OMPD_teams_distribute:
10295 case OMPD_teams_distribute_simd:
10296 case OMPD_teams_distribute_parallel_for:
10297 case OMPD_teams_distribute_parallel_for_simd:
10298 case OMPD_declare_simd:
10299 case OMPD_declare_variant:
10300 case OMPD_begin_declare_variant:
10301 case OMPD_end_declare_variant:
10302 case OMPD_declare_target:
10303 case OMPD_end_declare_target:
10304 case OMPD_declare_reduction:
10305 case OMPD_declare_mapper:
10306 case OMPD_taskloop:
10307 case OMPD_taskloop_simd:
10308 case OMPD_master_taskloop:
10309 case OMPD_master_taskloop_simd:
10310 case OMPD_parallel_master_taskloop:
10311 case OMPD_parallel_master_taskloop_simd:
10312 case OMPD_target:
10313 case OMPD_target_simd:
10314 case OMPD_target_teams_distribute:
10315 case OMPD_target_teams_distribute_simd:
10316 case OMPD_target_teams_distribute_parallel_for:
10317 case OMPD_target_teams_distribute_parallel_for_simd:
10318 case OMPD_target_teams:
10319 case OMPD_target_parallel:
10320 case OMPD_target_parallel_for:
10321 case OMPD_target_parallel_for_simd:
10322 case OMPD_requires:
10323 case OMPD_unknown:
10324 default:
10325 llvm_unreachable("Unexpected standalone target data directive.");
10326 break;
10327 }
10328 CGF.EmitRuntimeCall(
10329 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10330 OffloadingArgs);
10331 };
10332
10333 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10334 CodeGenFunction &CGF, PrePostActionTy &) {
10335 // Fill up the arrays with all the mapped variables.
10336 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10337 MappableExprsHandler::MapValuesArrayTy Pointers;
10338 MappableExprsHandler::MapValuesArrayTy Sizes;
10339 MappableExprsHandler::MapFlagsArrayTy MapTypes;
10340
10341 // Get map clause information.
10342 MappableExprsHandler MEHandler(D, CGF);
10343 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10344
10345 TargetDataInfo Info;
10346 // Fill up the arrays and create the arguments.
10347 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10348 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10349 Info.PointersArray, Info.SizesArray,
10350 Info.MapTypesArray, Info);
10351 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10352 InputInfo.BasePointersArray =
10353 Address(Info.BasePointersArray, CGM.getPointerAlign());
10354 InputInfo.PointersArray =
10355 Address(Info.PointersArray, CGM.getPointerAlign());
10356 InputInfo.SizesArray =
10357 Address(Info.SizesArray, CGM.getPointerAlign());
10358 MapTypesArray = Info.MapTypesArray;
10359 if (D.hasClausesOfKind<OMPDependClause>())
10360 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10361 else
10362 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10363 };
10364
10365 if (IfCond) {
10366 emitIfClause(CGF, IfCond, TargetThenGen,
10367 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10368 } else {
10369 RegionCodeGenTy ThenRCG(TargetThenGen);
10370 ThenRCG(CGF);
10371 }
10372 }
10373
10374 namespace {
10375 /// Kind of parameter in a function with 'declare simd' directive.
10376 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10377 /// Attribute set of the parameter.
10378 struct ParamAttrTy {
10379 ParamKindTy Kind = Vector;
10380 llvm::APSInt StrideOrArg;
10381 llvm::APSInt Alignment;
10382 };
10383 } // namespace
10384
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10385 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10386 ArrayRef<ParamAttrTy> ParamAttrs) {
10387 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10388 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10389 // of that clause. The VLEN value must be power of 2.
10390 // In other case the notion of the function`s "characteristic data type" (CDT)
10391 // is used to compute the vector length.
10392 // CDT is defined in the following order:
10393 // a) For non-void function, the CDT is the return type.
10394 // b) If the function has any non-uniform, non-linear parameters, then the
10395 // CDT is the type of the first such parameter.
10396 // c) If the CDT determined by a) or b) above is struct, union, or class
10397 // type which is pass-by-value (except for the type that maps to the
10398 // built-in complex data type), the characteristic data type is int.
10399 // d) If none of the above three cases is applicable, the CDT is int.
10400 // The VLEN is then determined based on the CDT and the size of vector
10401 // register of that ISA for which current vector version is generated. The
10402 // VLEN is computed using the formula below:
10403 // VLEN = sizeof(vector_register) / sizeof(CDT),
10404 // where vector register size specified in section 3.2.1 Registers and the
10405 // Stack Frame of original AMD64 ABI document.
10406 QualType RetType = FD->getReturnType();
10407 if (RetType.isNull())
10408 return 0;
10409 ASTContext &C = FD->getASTContext();
10410 QualType CDT;
10411 if (!RetType.isNull() && !RetType->isVoidType()) {
10412 CDT = RetType;
10413 } else {
10414 unsigned Offset = 0;
10415 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10416 if (ParamAttrs[Offset].Kind == Vector)
10417 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10418 ++Offset;
10419 }
10420 if (CDT.isNull()) {
10421 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10422 if (ParamAttrs[I + Offset].Kind == Vector) {
10423 CDT = FD->getParamDecl(I)->getType();
10424 break;
10425 }
10426 }
10427 }
10428 }
10429 if (CDT.isNull())
10430 CDT = C.IntTy;
10431 CDT = CDT->getCanonicalTypeUnqualified();
10432 if (CDT->isRecordType() || CDT->isUnionType())
10433 CDT = C.IntTy;
10434 return C.getTypeSize(CDT);
10435 }
10436
10437 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10438 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10439 const llvm::APSInt &VLENVal,
10440 ArrayRef<ParamAttrTy> ParamAttrs,
10441 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10442 struct ISADataTy {
10443 char ISA;
10444 unsigned VecRegSize;
10445 };
10446 ISADataTy ISAData[] = {
10447 {
10448 'b', 128
10449 }, // SSE
10450 {
10451 'c', 256
10452 }, // AVX
10453 {
10454 'd', 256
10455 }, // AVX2
10456 {
10457 'e', 512
10458 }, // AVX512
10459 };
10460 llvm::SmallVector<char, 2> Masked;
10461 switch (State) {
10462 case OMPDeclareSimdDeclAttr::BS_Undefined:
10463 Masked.push_back('N');
10464 Masked.push_back('M');
10465 break;
10466 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10467 Masked.push_back('N');
10468 break;
10469 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10470 Masked.push_back('M');
10471 break;
10472 }
10473 for (char Mask : Masked) {
10474 for (const ISADataTy &Data : ISAData) {
10475 SmallString<256> Buffer;
10476 llvm::raw_svector_ostream Out(Buffer);
10477 Out << "_ZGV" << Data.ISA << Mask;
10478 if (!VLENVal) {
10479 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10480 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10481 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10482 } else {
10483 Out << VLENVal;
10484 }
10485 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10486 switch (ParamAttr.Kind){
10487 case LinearWithVarStride:
10488 Out << 's' << ParamAttr.StrideOrArg;
10489 break;
10490 case Linear:
10491 Out << 'l';
10492 if (ParamAttr.StrideOrArg != 1)
10493 Out << ParamAttr.StrideOrArg;
10494 break;
10495 case Uniform:
10496 Out << 'u';
10497 break;
10498 case Vector:
10499 Out << 'v';
10500 break;
10501 }
10502 if (!!ParamAttr.Alignment)
10503 Out << 'a' << ParamAttr.Alignment;
10504 }
10505 Out << '_' << Fn->getName();
10506 Fn->addFnAttr(Out.str());
10507 }
10508 }
10509 }
10510
10511 // This are the Functions that are needed to mangle the name of the
10512 // vector functions generated by the compiler, according to the rules
10513 // defined in the "Vector Function ABI specifications for AArch64",
10514 // available at
10515 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10516
10517 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10518 ///
10519 /// TODO: Need to implement the behavior for reference marked with a
10520 /// var or no linear modifiers (1.b in the section). For this, we
10521 /// need to extend ParamKindTy to support the linear modifiers.
getAArch64MTV(QualType QT,ParamKindTy Kind)10522 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10523 QT = QT.getCanonicalType();
10524
10525 if (QT->isVoidType())
10526 return false;
10527
10528 if (Kind == ParamKindTy::Uniform)
10529 return false;
10530
10531 if (Kind == ParamKindTy::Linear)
10532 return false;
10533
10534 // TODO: Handle linear references with modifiers
10535
10536 if (Kind == ParamKindTy::LinearWithVarStride)
10537 return false;
10538
10539 return true;
10540 }
10541
10542 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10543 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10544 QT = QT.getCanonicalType();
10545 unsigned Size = C.getTypeSize(QT);
10546
10547 // Only scalars and complex within 16 bytes wide set PVB to true.
10548 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10549 return false;
10550
10551 if (QT->isFloatingType())
10552 return true;
10553
10554 if (QT->isIntegerType())
10555 return true;
10556
10557 if (QT->isPointerType())
10558 return true;
10559
10560 // TODO: Add support for complex types (section 3.1.2, item 2).
10561
10562 return false;
10563 }
10564
10565 /// Computes the lane size (LS) of a return type or of an input parameter,
10566 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10567 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10568 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10569 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10570 QualType PTy = QT.getCanonicalType()->getPointeeType();
10571 if (getAArch64PBV(PTy, C))
10572 return C.getTypeSize(PTy);
10573 }
10574 if (getAArch64PBV(QT, C))
10575 return C.getTypeSize(QT);
10576
10577 return C.getTypeSize(C.getUIntPtrType());
10578 }
10579
10580 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10581 // signature of the scalar function, as defined in 3.2.2 of the
10582 // AAVFABI.
10583 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10584 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10585 QualType RetType = FD->getReturnType().getCanonicalType();
10586
10587 ASTContext &C = FD->getASTContext();
10588
10589 bool OutputBecomesInput = false;
10590
10591 llvm::SmallVector<unsigned, 8> Sizes;
10592 if (!RetType->isVoidType()) {
10593 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10594 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10595 OutputBecomesInput = true;
10596 }
10597 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10598 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10599 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10600 }
10601
10602 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10603 // The LS of a function parameter / return value can only be a power
10604 // of 2, starting from 8 bits, up to 128.
10605 assert(std::all_of(Sizes.begin(), Sizes.end(),
10606 [](unsigned Size) {
10607 return Size == 8 || Size == 16 || Size == 32 ||
10608 Size == 64 || Size == 128;
10609 }) &&
10610 "Invalid size");
10611
10612 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10613 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10614 OutputBecomesInput);
10615 }
10616
10617 /// Mangle the parameter part of the vector function name according to
10618 /// their OpenMP classification. The mangling function is defined in
10619 /// section 3.5 of the AAVFABI.
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10620 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10621 SmallString<256> Buffer;
10622 llvm::raw_svector_ostream Out(Buffer);
10623 for (const auto &ParamAttr : ParamAttrs) {
10624 switch (ParamAttr.Kind) {
10625 case LinearWithVarStride:
10626 Out << "ls" << ParamAttr.StrideOrArg;
10627 break;
10628 case Linear:
10629 Out << 'l';
10630 // Don't print the step value if it is not present or if it is
10631 // equal to 1.
10632 if (ParamAttr.StrideOrArg != 1)
10633 Out << ParamAttr.StrideOrArg;
10634 break;
10635 case Uniform:
10636 Out << 'u';
10637 break;
10638 case Vector:
10639 Out << 'v';
10640 break;
10641 }
10642
10643 if (!!ParamAttr.Alignment)
10644 Out << 'a' << ParamAttr.Alignment;
10645 }
10646
10647 return std::string(Out.str());
10648 }
10649
10650 // Function used to add the attribute. The parameter `VLEN` is
10651 // templated to allow the use of "x" when targeting scalable functions
10652 // for SVE.
10653 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10654 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10655 char ISA, StringRef ParSeq,
10656 StringRef MangledName, bool OutputBecomesInput,
10657 llvm::Function *Fn) {
10658 SmallString<256> Buffer;
10659 llvm::raw_svector_ostream Out(Buffer);
10660 Out << Prefix << ISA << LMask << VLEN;
10661 if (OutputBecomesInput)
10662 Out << "v";
10663 Out << ParSeq << "_" << MangledName;
10664 Fn->addFnAttr(Out.str());
10665 }
10666
10667 // Helper function to generate the Advanced SIMD names depending on
10668 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10669 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10670 StringRef Prefix, char ISA,
10671 StringRef ParSeq, StringRef MangledName,
10672 bool OutputBecomesInput,
10673 llvm::Function *Fn) {
10674 switch (NDS) {
10675 case 8:
10676 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10677 OutputBecomesInput, Fn);
10678 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10679 OutputBecomesInput, Fn);
10680 break;
10681 case 16:
10682 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10683 OutputBecomesInput, Fn);
10684 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10685 OutputBecomesInput, Fn);
10686 break;
10687 case 32:
10688 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10689 OutputBecomesInput, Fn);
10690 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10691 OutputBecomesInput, Fn);
10692 break;
10693 case 64:
10694 case 128:
10695 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10696 OutputBecomesInput, Fn);
10697 break;
10698 default:
10699 llvm_unreachable("Scalar type is too wide.");
10700 }
10701 }
10702
10703 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10704 static void emitAArch64DeclareSimdFunction(
10705 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10706 ArrayRef<ParamAttrTy> ParamAttrs,
10707 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10708 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10709
10710 // Get basic data for building the vector signature.
10711 const auto Data = getNDSWDS(FD, ParamAttrs);
10712 const unsigned NDS = std::get<0>(Data);
10713 const unsigned WDS = std::get<1>(Data);
10714 const bool OutputBecomesInput = std::get<2>(Data);
10715
10716 // Check the values provided via `simdlen` by the user.
10717 // 1. A `simdlen(1)` doesn't produce vector signatures,
10718 if (UserVLEN == 1) {
10719 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10720 DiagnosticsEngine::Warning,
10721 "The clause simdlen(1) has no effect when targeting aarch64.");
10722 CGM.getDiags().Report(SLoc, DiagID);
10723 return;
10724 }
10725
10726 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10727 // Advanced SIMD output.
10728 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10729 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10730 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10731 "power of 2 when targeting Advanced SIMD.");
10732 CGM.getDiags().Report(SLoc, DiagID);
10733 return;
10734 }
10735
10736 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10737 // limits.
10738 if (ISA == 's' && UserVLEN != 0) {
10739 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10740 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10741 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10742 "lanes in the architectural constraints "
10743 "for SVE (min is 128-bit, max is "
10744 "2048-bit, by steps of 128-bit)");
10745 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10746 return;
10747 }
10748 }
10749
10750 // Sort out parameter sequence.
10751 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10752 StringRef Prefix = "_ZGV";
10753 // Generate simdlen from user input (if any).
10754 if (UserVLEN) {
10755 if (ISA == 's') {
10756 // SVE generates only a masked function.
10757 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10758 OutputBecomesInput, Fn);
10759 } else {
10760 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10761 // Advanced SIMD generates one or two functions, depending on
10762 // the `[not]inbranch` clause.
10763 switch (State) {
10764 case OMPDeclareSimdDeclAttr::BS_Undefined:
10765 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10766 OutputBecomesInput, Fn);
10767 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10768 OutputBecomesInput, Fn);
10769 break;
10770 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10771 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10772 OutputBecomesInput, Fn);
10773 break;
10774 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10775 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10776 OutputBecomesInput, Fn);
10777 break;
10778 }
10779 }
10780 } else {
10781 // If no user simdlen is provided, follow the AAVFABI rules for
10782 // generating the vector length.
10783 if (ISA == 's') {
10784 // SVE, section 3.4.1, item 1.
10785 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10786 OutputBecomesInput, Fn);
10787 } else {
10788 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10789 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10790 // two vector names depending on the use of the clause
10791 // `[not]inbranch`.
10792 switch (State) {
10793 case OMPDeclareSimdDeclAttr::BS_Undefined:
10794 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10795 OutputBecomesInput, Fn);
10796 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10797 OutputBecomesInput, Fn);
10798 break;
10799 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10800 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10801 OutputBecomesInput, Fn);
10802 break;
10803 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10804 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10805 OutputBecomesInput, Fn);
10806 break;
10807 }
10808 }
10809 }
10810 }
10811
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)10812 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10813 llvm::Function *Fn) {
10814 ASTContext &C = CGM.getContext();
10815 FD = FD->getMostRecentDecl();
10816 // Map params to their positions in function decl.
10817 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10818 if (isa<CXXMethodDecl>(FD))
10819 ParamPositions.try_emplace(FD, 0);
10820 unsigned ParamPos = ParamPositions.size();
10821 for (const ParmVarDecl *P : FD->parameters()) {
10822 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10823 ++ParamPos;
10824 }
10825 while (FD) {
10826 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10827 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10828 // Mark uniform parameters.
10829 for (const Expr *E : Attr->uniforms()) {
10830 E = E->IgnoreParenImpCasts();
10831 unsigned Pos;
10832 if (isa<CXXThisExpr>(E)) {
10833 Pos = ParamPositions[FD];
10834 } else {
10835 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10836 ->getCanonicalDecl();
10837 Pos = ParamPositions[PVD];
10838 }
10839 ParamAttrs[Pos].Kind = Uniform;
10840 }
10841 // Get alignment info.
10842 auto NI = Attr->alignments_begin();
10843 for (const Expr *E : Attr->aligneds()) {
10844 E = E->IgnoreParenImpCasts();
10845 unsigned Pos;
10846 QualType ParmTy;
10847 if (isa<CXXThisExpr>(E)) {
10848 Pos = ParamPositions[FD];
10849 ParmTy = E->getType();
10850 } else {
10851 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10852 ->getCanonicalDecl();
10853 Pos = ParamPositions[PVD];
10854 ParmTy = PVD->getType();
10855 }
10856 ParamAttrs[Pos].Alignment =
10857 (*NI)
10858 ? (*NI)->EvaluateKnownConstInt(C)
10859 : llvm::APSInt::getUnsigned(
10860 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10861 .getQuantity());
10862 ++NI;
10863 }
10864 // Mark linear parameters.
10865 auto SI = Attr->steps_begin();
10866 auto MI = Attr->modifiers_begin();
10867 for (const Expr *E : Attr->linears()) {
10868 E = E->IgnoreParenImpCasts();
10869 unsigned Pos;
10870 // Rescaling factor needed to compute the linear parameter
10871 // value in the mangled name.
10872 unsigned PtrRescalingFactor = 1;
10873 if (isa<CXXThisExpr>(E)) {
10874 Pos = ParamPositions[FD];
10875 } else {
10876 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10877 ->getCanonicalDecl();
10878 Pos = ParamPositions[PVD];
10879 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10880 PtrRescalingFactor = CGM.getContext()
10881 .getTypeSizeInChars(P->getPointeeType())
10882 .getQuantity();
10883 }
10884 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10885 ParamAttr.Kind = Linear;
10886 // Assuming a stride of 1, for `linear` without modifiers.
10887 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10888 if (*SI) {
10889 Expr::EvalResult Result;
10890 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10891 if (const auto *DRE =
10892 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10893 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10894 ParamAttr.Kind = LinearWithVarStride;
10895 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10896 ParamPositions[StridePVD->getCanonicalDecl()]);
10897 }
10898 }
10899 } else {
10900 ParamAttr.StrideOrArg = Result.Val.getInt();
10901 }
10902 }
10903 // If we are using a linear clause on a pointer, we need to
10904 // rescale the value of linear_step with the byte size of the
10905 // pointee type.
10906 if (Linear == ParamAttr.Kind)
10907 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10908 ++SI;
10909 ++MI;
10910 }
10911 llvm::APSInt VLENVal;
10912 SourceLocation ExprLoc;
10913 const Expr *VLENExpr = Attr->getSimdlen();
10914 if (VLENExpr) {
10915 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10916 ExprLoc = VLENExpr->getExprLoc();
10917 }
10918 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10919 if (CGM.getTriple().isX86()) {
10920 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10921 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10922 unsigned VLEN = VLENVal.getExtValue();
10923 StringRef MangledName = Fn->getName();
10924 if (CGM.getTarget().hasFeature("sve"))
10925 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10926 MangledName, 's', 128, Fn, ExprLoc);
10927 if (CGM.getTarget().hasFeature("neon"))
10928 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10929 MangledName, 'n', 128, Fn, ExprLoc);
10930 }
10931 }
10932 FD = FD->getPreviousDecl();
10933 }
10934 }
10935
10936 namespace {
10937 /// Cleanup action for doacross support.
10938 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10939 public:
10940 static const int DoacrossFinArgs = 2;
10941
10942 private:
10943 llvm::FunctionCallee RTLFn;
10944 llvm::Value *Args[DoacrossFinArgs];
10945
10946 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)10947 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10948 ArrayRef<llvm::Value *> CallArgs)
10949 : RTLFn(RTLFn) {
10950 assert(CallArgs.size() == DoacrossFinArgs);
10951 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10952 }
Emit(CodeGenFunction & CGF,Flags)10953 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10954 if (!CGF.HaveInsertPoint())
10955 return;
10956 CGF.EmitRuntimeCall(RTLFn, Args);
10957 }
10958 };
10959 } // namespace
10960
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)10961 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10962 const OMPLoopDirective &D,
10963 ArrayRef<Expr *> NumIterations) {
10964 if (!CGF.HaveInsertPoint())
10965 return;
10966
10967 ASTContext &C = CGM.getContext();
10968 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10969 RecordDecl *RD;
10970 if (KmpDimTy.isNull()) {
10971 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10972 // kmp_int64 lo; // lower
10973 // kmp_int64 up; // upper
10974 // kmp_int64 st; // stride
10975 // };
10976 RD = C.buildImplicitRecord("kmp_dim");
10977 RD->startDefinition();
10978 addFieldToRecordDecl(C, RD, Int64Ty);
10979 addFieldToRecordDecl(C, RD, Int64Ty);
10980 addFieldToRecordDecl(C, RD, Int64Ty);
10981 RD->completeDefinition();
10982 KmpDimTy = C.getRecordType(RD);
10983 } else {
10984 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10985 }
10986 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10987 QualType ArrayTy =
10988 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10989
10990 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10991 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10992 enum { LowerFD = 0, UpperFD, StrideFD };
10993 // Fill dims with data.
10994 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10995 LValue DimsLVal = CGF.MakeAddrLValue(
10996 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10997 // dims.upper = num_iterations;
10998 LValue UpperLVal = CGF.EmitLValueForField(
10999 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11000 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11001 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11002 Int64Ty, NumIterations[I]->getExprLoc());
11003 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11004 // dims.stride = 1;
11005 LValue StrideLVal = CGF.EmitLValueForField(
11006 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11007 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11008 StrideLVal);
11009 }
11010
11011 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11012 // kmp_int32 num_dims, struct kmp_dim * dims);
11013 llvm::Value *Args[] = {
11014 emitUpdateLocation(CGF, D.getBeginLoc()),
11015 getThreadID(CGF, D.getBeginLoc()),
11016 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11018 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11019 CGM.VoidPtrTy)};
11020
11021 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11022 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11023 CGF.EmitRuntimeCall(RTLFn, Args);
11024 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11025 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11026 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11027 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11028 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11029 llvm::makeArrayRef(FiniArgs));
11030 }
11031
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11032 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11033 const OMPDependClause *C) {
11034 QualType Int64Ty =
11035 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11036 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11037 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11038 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11039 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11040 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11041 const Expr *CounterVal = C->getLoopData(I);
11042 assert(CounterVal);
11043 llvm::Value *CntVal = CGF.EmitScalarConversion(
11044 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11045 CounterVal->getExprLoc());
11046 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11047 /*Volatile=*/false, Int64Ty);
11048 }
11049 llvm::Value *Args[] = {
11050 emitUpdateLocation(CGF, C->getBeginLoc()),
11051 getThreadID(CGF, C->getBeginLoc()),
11052 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11053 llvm::FunctionCallee RTLFn;
11054 if (C->getDependencyKind() == OMPC_DEPEND_source) {
11055 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11056 OMPRTL___kmpc_doacross_post);
11057 } else {
11058 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11059 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11060 OMPRTL___kmpc_doacross_wait);
11061 }
11062 CGF.EmitRuntimeCall(RTLFn, Args);
11063 }
11064
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11065 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11066 llvm::FunctionCallee Callee,
11067 ArrayRef<llvm::Value *> Args) const {
11068 assert(Loc.isValid() && "Outlined function call location must be valid.");
11069 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11070
11071 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11072 if (Fn->doesNotThrow()) {
11073 CGF.EmitNounwindRuntimeCall(Fn, Args);
11074 return;
11075 }
11076 }
11077 CGF.EmitRuntimeCall(Callee, Args);
11078 }
11079
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11080 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11081 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11082 ArrayRef<llvm::Value *> Args) const {
11083 emitCall(CGF, Loc, OutlinedFn, Args);
11084 }
11085
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11086 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11087 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11088 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11089 HasEmittedDeclareTargetRegion = true;
11090 }
11091
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11092 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11093 const VarDecl *NativeParam,
11094 const VarDecl *TargetParam) const {
11095 return CGF.GetAddrOfLocalVar(NativeParam);
11096 }
11097
11098 namespace {
11099 /// Cleanup action for allocate support.
11100 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11101 public:
11102 static const int CleanupArgs = 3;
11103
11104 private:
11105 llvm::FunctionCallee RTLFn;
11106 llvm::Value *Args[CleanupArgs];
11107
11108 public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11109 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11110 ArrayRef<llvm::Value *> CallArgs)
11111 : RTLFn(RTLFn) {
11112 assert(CallArgs.size() == CleanupArgs &&
11113 "Size of arguments does not match.");
11114 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11115 }
Emit(CodeGenFunction & CGF,Flags)11116 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11117 if (!CGF.HaveInsertPoint())
11118 return;
11119 CGF.EmitRuntimeCall(RTLFn, Args);
11120 }
11121 };
11122 } // namespace
11123
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11124 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11125 const VarDecl *VD) {
11126 if (!VD)
11127 return Address::invalid();
11128 const VarDecl *CVD = VD->getCanonicalDecl();
11129 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11130 return Address::invalid();
11131 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11132 // Use the default allocation.
11133 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11134 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11135 !AA->getAllocator())
11136 return Address::invalid();
11137 llvm::Value *Size;
11138 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11139 if (CVD->getType()->isVariablyModifiedType()) {
11140 Size = CGF.getTypeSize(CVD->getType());
11141 // Align the size: ((size + align - 1) / align) * align
11142 Size = CGF.Builder.CreateNUWAdd(
11143 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11144 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11145 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11146 } else {
11147 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11148 Size = CGM.getSize(Sz.alignTo(Align));
11149 }
11150 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11151 assert(AA->getAllocator() &&
11152 "Expected allocator expression for non-default allocator.");
11153 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11154 // According to the standard, the original allocator type is a enum (integer).
11155 // Convert to pointer type, if required.
11156 if (Allocator->getType()->isIntegerTy())
11157 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11158 else if (Allocator->getType()->isPointerTy())
11159 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11160 CGM.VoidPtrTy);
11161 llvm::Value *Args[] = {ThreadID, Size, Allocator};
11162
11163 llvm::Value *Addr =
11164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11165 CGM.getModule(), OMPRTL___kmpc_alloc),
11166 Args, getName({CVD->getName(), ".void.addr"}));
11167 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11168 Allocator};
11169 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11170 CGM.getModule(), OMPRTL___kmpc_free);
11171
11172 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11173 llvm::makeArrayRef(FiniArgs));
11174 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11175 Addr,
11176 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11177 getName({CVD->getName(), ".addr"}));
11178 return Address(Addr, Align);
11179 }
11180
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11181 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11182 CodeGenModule &CGM, const OMPLoopDirective &S)
11183 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11184 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11185 if (!NeedToPush)
11186 return;
11187 NontemporalDeclsSet &DS =
11188 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11189 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11190 for (const Stmt *Ref : C->private_refs()) {
11191 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11192 const ValueDecl *VD;
11193 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11194 VD = DRE->getDecl();
11195 } else {
11196 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11197 assert((ME->isImplicitCXXThis() ||
11198 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11199 "Expected member of current class.");
11200 VD = ME->getMemberDecl();
11201 }
11202 DS.insert(VD);
11203 }
11204 }
11205 }
11206
~NontemporalDeclsRAII()11207 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11208 if (!NeedToPush)
11209 return;
11210 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11211 }
11212
isNontemporalDecl(const ValueDecl * VD) const11213 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11214 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11215
11216 return llvm::any_of(
11217 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11218 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11219 }
11220
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11221 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11222 const OMPExecutableDirective &S,
11223 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11224 const {
11225 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11226 // Vars in target/task regions must be excluded completely.
11227 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11228 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11229 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11230 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11231 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11232 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11233 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11234 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11235 }
11236 }
11237 // Exclude vars in private clauses.
11238 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11239 for (const Expr *Ref : C->varlists()) {
11240 if (!Ref->getType()->isScalarType())
11241 continue;
11242 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11243 if (!DRE)
11244 continue;
11245 NeedToCheckForLPCs.insert(DRE->getDecl());
11246 }
11247 }
11248 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11249 for (const Expr *Ref : C->varlists()) {
11250 if (!Ref->getType()->isScalarType())
11251 continue;
11252 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11253 if (!DRE)
11254 continue;
11255 NeedToCheckForLPCs.insert(DRE->getDecl());
11256 }
11257 }
11258 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11259 for (const Expr *Ref : C->varlists()) {
11260 if (!Ref->getType()->isScalarType())
11261 continue;
11262 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11263 if (!DRE)
11264 continue;
11265 NeedToCheckForLPCs.insert(DRE->getDecl());
11266 }
11267 }
11268 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11269 for (const Expr *Ref : C->varlists()) {
11270 if (!Ref->getType()->isScalarType())
11271 continue;
11272 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11273 if (!DRE)
11274 continue;
11275 NeedToCheckForLPCs.insert(DRE->getDecl());
11276 }
11277 }
11278 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11279 for (const Expr *Ref : C->varlists()) {
11280 if (!Ref->getType()->isScalarType())
11281 continue;
11282 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11283 if (!DRE)
11284 continue;
11285 NeedToCheckForLPCs.insert(DRE->getDecl());
11286 }
11287 }
11288 for (const Decl *VD : NeedToCheckForLPCs) {
11289 for (const LastprivateConditionalData &Data :
11290 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11291 if (Data.DeclToUniqueName.count(VD) > 0) {
11292 if (!Data.Disabled)
11293 NeedToAddForLPCsAsDisabled.insert(VD);
11294 break;
11295 }
11296 }
11297 }
11298 }
11299
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11300 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11301 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11302 : CGM(CGF.CGM),
11303 Action((CGM.getLangOpts().OpenMP >= 50 &&
11304 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11305 [](const OMPLastprivateClause *C) {
11306 return C->getKind() ==
11307 OMPC_LASTPRIVATE_conditional;
11308 }))
11309 ? ActionToDo::PushAsLastprivateConditional
11310 : ActionToDo::DoNotPush) {
11311 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11312 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11313 return;
11314 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11315 "Expected a push action.");
11316 LastprivateConditionalData &Data =
11317 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11318 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11319 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11320 continue;
11321
11322 for (const Expr *Ref : C->varlists()) {
11323 Data.DeclToUniqueName.insert(std::make_pair(
11324 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11325 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11326 }
11327 }
11328 Data.IVLVal = IVLVal;
11329 Data.Fn = CGF.CurFn;
11330 }
11331
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11333 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11334 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11335 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11336 if (CGM.getLangOpts().OpenMP < 50)
11337 return;
11338 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11339 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11340 if (!NeedToAddForLPCsAsDisabled.empty()) {
11341 Action = ActionToDo::DisableLastprivateConditional;
11342 LastprivateConditionalData &Data =
11343 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11344 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11345 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11346 Data.Fn = CGF.CurFn;
11347 Data.Disabled = true;
11348 }
11349 }
11350
11351 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11352 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11353 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11354 return LastprivateConditionalRAII(CGF, S);
11355 }
11356
~LastprivateConditionalRAII()11357 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11358 if (CGM.getLangOpts().OpenMP < 50)
11359 return;
11360 if (Action == ActionToDo::DisableLastprivateConditional) {
11361 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11362 "Expected list of disabled private vars.");
11363 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11364 }
11365 if (Action == ActionToDo::PushAsLastprivateConditional) {
11366 assert(
11367 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11368 "Expected list of lastprivate conditional vars.");
11369 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11370 }
11371 }
11372
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11373 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11374 const VarDecl *VD) {
11375 ASTContext &C = CGM.getContext();
11376 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11377 if (I == LastprivateConditionalToTypes.end())
11378 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11379 QualType NewType;
11380 const FieldDecl *VDField;
11381 const FieldDecl *FiredField;
11382 LValue BaseLVal;
11383 auto VI = I->getSecond().find(VD);
11384 if (VI == I->getSecond().end()) {
11385 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11386 RD->startDefinition();
11387 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11388 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11389 RD->completeDefinition();
11390 NewType = C.getRecordType(RD);
11391 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11392 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11393 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11394 } else {
11395 NewType = std::get<0>(VI->getSecond());
11396 VDField = std::get<1>(VI->getSecond());
11397 FiredField = std::get<2>(VI->getSecond());
11398 BaseLVal = std::get<3>(VI->getSecond());
11399 }
11400 LValue FiredLVal =
11401 CGF.EmitLValueForField(BaseLVal, FiredField);
11402 CGF.EmitStoreOfScalar(
11403 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11404 FiredLVal);
11405 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11406 }
11407
11408 namespace {
11409 /// Checks if the lastprivate conditional variable is referenced in LHS.
11410 class LastprivateConditionalRefChecker final
11411 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11412 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11413 const Expr *FoundE = nullptr;
11414 const Decl *FoundD = nullptr;
11415 StringRef UniqueDeclName;
11416 LValue IVLVal;
11417 llvm::Function *FoundFn = nullptr;
11418 SourceLocation Loc;
11419
11420 public:
VisitDeclRefExpr(const DeclRefExpr * E)11421 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11422 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11423 llvm::reverse(LPM)) {
11424 auto It = D.DeclToUniqueName.find(E->getDecl());
11425 if (It == D.DeclToUniqueName.end())
11426 continue;
11427 if (D.Disabled)
11428 return false;
11429 FoundE = E;
11430 FoundD = E->getDecl()->getCanonicalDecl();
11431 UniqueDeclName = It->second;
11432 IVLVal = D.IVLVal;
11433 FoundFn = D.Fn;
11434 break;
11435 }
11436 return FoundE == E;
11437 }
VisitMemberExpr(const MemberExpr * E)11438 bool VisitMemberExpr(const MemberExpr *E) {
11439 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11440 return false;
11441 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11442 llvm::reverse(LPM)) {
11443 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11444 if (It == D.DeclToUniqueName.end())
11445 continue;
11446 if (D.Disabled)
11447 return false;
11448 FoundE = E;
11449 FoundD = E->getMemberDecl()->getCanonicalDecl();
11450 UniqueDeclName = It->second;
11451 IVLVal = D.IVLVal;
11452 FoundFn = D.Fn;
11453 break;
11454 }
11455 return FoundE == E;
11456 }
VisitStmt(const Stmt * S)11457 bool VisitStmt(const Stmt *S) {
11458 for (const Stmt *Child : S->children()) {
11459 if (!Child)
11460 continue;
11461 if (const auto *E = dyn_cast<Expr>(Child))
11462 if (!E->isGLValue())
11463 continue;
11464 if (Visit(Child))
11465 return true;
11466 }
11467 return false;
11468 }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11469 explicit LastprivateConditionalRefChecker(
11470 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11471 : LPM(LPM) {}
11472 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const11473 getFoundData() const {
11474 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11475 }
11476 };
11477 } // namespace
11478
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11479 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11480 LValue IVLVal,
11481 StringRef UniqueDeclName,
11482 LValue LVal,
11483 SourceLocation Loc) {
11484 // Last updated loop counter for the lastprivate conditional var.
11485 // int<xx> last_iv = 0;
11486 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11487 llvm::Constant *LastIV =
11488 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11489 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11490 IVLVal.getAlignment().getAsAlign());
11491 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11492
11493 // Last value of the lastprivate conditional.
11494 // decltype(priv_a) last_a;
11495 llvm::Constant *Last = getOrCreateInternalVariable(
11496 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11497 cast<llvm::GlobalVariable>(Last)->setAlignment(
11498 LVal.getAlignment().getAsAlign());
11499 LValue LastLVal =
11500 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11501
11502 // Global loop counter. Required to handle inner parallel-for regions.
11503 // iv
11504 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11505
11506 // #pragma omp critical(a)
11507 // if (last_iv <= iv) {
11508 // last_iv = iv;
11509 // last_a = priv_a;
11510 // }
11511 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11512 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11513 Action.Enter(CGF);
11514 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11515 // (last_iv <= iv) ? Check if the variable is updated and store new
11516 // value in global var.
11517 llvm::Value *CmpRes;
11518 if (IVLVal.getType()->isSignedIntegerType()) {
11519 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11520 } else {
11521 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11522 "Loop iteration variable must be integer.");
11523 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11524 }
11525 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11526 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11527 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11528 // {
11529 CGF.EmitBlock(ThenBB);
11530
11531 // last_iv = iv;
11532 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11533
11534 // last_a = priv_a;
11535 switch (CGF.getEvaluationKind(LVal.getType())) {
11536 case TEK_Scalar: {
11537 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11538 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11539 break;
11540 }
11541 case TEK_Complex: {
11542 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11543 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11544 break;
11545 }
11546 case TEK_Aggregate:
11547 llvm_unreachable(
11548 "Aggregates are not supported in lastprivate conditional.");
11549 }
11550 // }
11551 CGF.EmitBranch(ExitBB);
11552 // There is no need to emit line number for unconditional branch.
11553 (void)ApplyDebugLocation::CreateEmpty(CGF);
11554 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11555 };
11556
11557 if (CGM.getLangOpts().OpenMPSimd) {
11558 // Do not emit as a critical region as no parallel region could be emitted.
11559 RegionCodeGenTy ThenRCG(CodeGen);
11560 ThenRCG(CGF);
11561 } else {
11562 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11563 }
11564 }
11565
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11566 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11567 const Expr *LHS) {
11568 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11569 return;
11570 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11571 if (!Checker.Visit(LHS))
11572 return;
11573 const Expr *FoundE;
11574 const Decl *FoundD;
11575 StringRef UniqueDeclName;
11576 LValue IVLVal;
11577 llvm::Function *FoundFn;
11578 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11579 Checker.getFoundData();
11580 if (FoundFn != CGF.CurFn) {
11581 // Special codegen for inner parallel regions.
11582 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11583 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11584 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11585 "Lastprivate conditional is not found in outer region.");
11586 QualType StructTy = std::get<0>(It->getSecond());
11587 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11588 LValue PrivLVal = CGF.EmitLValue(FoundE);
11589 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11590 PrivLVal.getAddress(CGF),
11591 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11592 LValue BaseLVal =
11593 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11594 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11595 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11596 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11597 FiredLVal, llvm::AtomicOrdering::Unordered,
11598 /*IsVolatile=*/true, /*isInit=*/false);
11599 return;
11600 }
11601
11602 // Private address of the lastprivate conditional in the current context.
11603 // priv_a
11604 LValue LVal = CGF.EmitLValue(FoundE);
11605 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11606 FoundE->getExprLoc());
11607 }
11608
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11609 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11610 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11611 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11612 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11613 return;
11614 auto Range = llvm::reverse(LastprivateConditionalStack);
11615 auto It = llvm::find_if(
11616 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11617 if (It == Range.end() || It->Fn != CGF.CurFn)
11618 return;
11619 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11620 assert(LPCI != LastprivateConditionalToTypes.end() &&
11621 "Lastprivates must be registered already.");
11622 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11623 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11624 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11625 for (const auto &Pair : It->DeclToUniqueName) {
11626 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11627 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11628 continue;
11629 auto I = LPCI->getSecond().find(Pair.first);
11630 assert(I != LPCI->getSecond().end() &&
11631 "Lastprivate must be rehistered already.");
11632 // bool Cmp = priv_a.Fired != 0;
11633 LValue BaseLVal = std::get<3>(I->getSecond());
11634 LValue FiredLVal =
11635 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11636 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11637 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11638 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11639 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11640 // if (Cmp) {
11641 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11642 CGF.EmitBlock(ThenBB);
11643 Address Addr = CGF.GetAddrOfLocalVar(VD);
11644 LValue LVal;
11645 if (VD->getType()->isReferenceType())
11646 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11647 AlignmentSource::Decl);
11648 else
11649 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11650 AlignmentSource::Decl);
11651 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11652 D.getBeginLoc());
11653 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11654 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11655 // }
11656 }
11657 }
11658
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11659 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11660 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11661 SourceLocation Loc) {
11662 if (CGF.getLangOpts().OpenMP < 50)
11663 return;
11664 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11665 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11666 "Unknown lastprivate conditional variable.");
11667 StringRef UniqueName = It->second;
11668 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11669 // The variable was not updated in the region - exit.
11670 if (!GV)
11671 return;
11672 LValue LPLVal = CGF.MakeAddrLValue(
11673 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11674 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11675 CGF.EmitStoreOfScalar(Res, PrivLVal);
11676 }
11677
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11678 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11679 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11680 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11681 llvm_unreachable("Not supported in SIMD-only mode");
11682 }
11683
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11684 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11685 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11686 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11687 llvm_unreachable("Not supported in SIMD-only mode");
11688 }
11689
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)11690 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11691 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11692 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11693 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11694 bool Tied, unsigned &NumberOfParts) {
11695 llvm_unreachable("Not supported in SIMD-only mode");
11696 }
11697
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)11698 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11699 SourceLocation Loc,
11700 llvm::Function *OutlinedFn,
11701 ArrayRef<llvm::Value *> CapturedVars,
11702 const Expr *IfCond) {
11703 llvm_unreachable("Not supported in SIMD-only mode");
11704 }
11705
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)11706 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11707 CodeGenFunction &CGF, StringRef CriticalName,
11708 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11709 const Expr *Hint) {
11710 llvm_unreachable("Not supported in SIMD-only mode");
11711 }
11712
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)11713 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11714 const RegionCodeGenTy &MasterOpGen,
11715 SourceLocation Loc) {
11716 llvm_unreachable("Not supported in SIMD-only mode");
11717 }
11718
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)11719 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11720 SourceLocation Loc) {
11721 llvm_unreachable("Not supported in SIMD-only mode");
11722 }
11723
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)11724 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11725 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11726 SourceLocation Loc) {
11727 llvm_unreachable("Not supported in SIMD-only mode");
11728 }
11729
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)11730 void CGOpenMPSIMDRuntime::emitSingleRegion(
11731 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11732 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11733 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11734 ArrayRef<const Expr *> AssignmentOps) {
11735 llvm_unreachable("Not supported in SIMD-only mode");
11736 }
11737
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)11738 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11739 const RegionCodeGenTy &OrderedOpGen,
11740 SourceLocation Loc,
11741 bool IsThreads) {
11742 llvm_unreachable("Not supported in SIMD-only mode");
11743 }
11744
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)11745 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11746 SourceLocation Loc,
11747 OpenMPDirectiveKind Kind,
11748 bool EmitChecks,
11749 bool ForceSimpleCall) {
11750 llvm_unreachable("Not supported in SIMD-only mode");
11751 }
11752
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)11753 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11754 CodeGenFunction &CGF, SourceLocation Loc,
11755 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11756 bool Ordered, const DispatchRTInput &DispatchValues) {
11757 llvm_unreachable("Not supported in SIMD-only mode");
11758 }
11759
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)11760 void CGOpenMPSIMDRuntime::emitForStaticInit(
11761 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11762 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11763 llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)11766 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11767 CodeGenFunction &CGF, SourceLocation Loc,
11768 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11769 llvm_unreachable("Not supported in SIMD-only mode");
11770 }
11771
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)11772 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11773 SourceLocation Loc,
11774 unsigned IVSize,
11775 bool IVSigned) {
11776 llvm_unreachable("Not supported in SIMD-only mode");
11777 }
11778
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)11779 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11780 SourceLocation Loc,
11781 OpenMPDirectiveKind DKind) {
11782 llvm_unreachable("Not supported in SIMD-only mode");
11783 }
11784
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)11785 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11786 SourceLocation Loc,
11787 unsigned IVSize, bool IVSigned,
11788 Address IL, Address LB,
11789 Address UB, Address ST) {
11790 llvm_unreachable("Not supported in SIMD-only mode");
11791 }
11792
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)11793 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11794 llvm::Value *NumThreads,
11795 SourceLocation Loc) {
11796 llvm_unreachable("Not supported in SIMD-only mode");
11797 }
11798
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)11799 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11800 ProcBindKind ProcBind,
11801 SourceLocation Loc) {
11802 llvm_unreachable("Not supported in SIMD-only mode");
11803 }
11804
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)11805 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11806 const VarDecl *VD,
11807 Address VDAddr,
11808 SourceLocation Loc) {
11809 llvm_unreachable("Not supported in SIMD-only mode");
11810 }
11811
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)11812 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11813 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11814 CodeGenFunction *CGF) {
11815 llvm_unreachable("Not supported in SIMD-only mode");
11816 }
11817
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)11818 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11819 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11820 llvm_unreachable("Not supported in SIMD-only mode");
11821 }
11822
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)11823 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11824 ArrayRef<const Expr *> Vars,
11825 SourceLocation Loc,
11826 llvm::AtomicOrdering AO) {
11827 llvm_unreachable("Not supported in SIMD-only mode");
11828 }
11829
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11830 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11831 const OMPExecutableDirective &D,
11832 llvm::Function *TaskFunction,
11833 QualType SharedsTy, Address Shareds,
11834 const Expr *IfCond,
11835 const OMPTaskDataTy &Data) {
11836 llvm_unreachable("Not supported in SIMD-only mode");
11837 }
11838
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11839 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11840 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11841 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11842 const Expr *IfCond, const OMPTaskDataTy &Data) {
11843 llvm_unreachable("Not supported in SIMD-only mode");
11844 }
11845
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)11846 void CGOpenMPSIMDRuntime::emitReduction(
11847 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11848 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11849 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11850 assert(Options.SimpleReduction && "Only simple reduction is expected.");
11851 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11852 ReductionOps, Options);
11853 }
11854
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)11855 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11856 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11857 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11858 llvm_unreachable("Not supported in SIMD-only mode");
11859 }
11860
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)11861 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11862 SourceLocation Loc,
11863 bool IsWorksharingReduction) {
11864 llvm_unreachable("Not supported in SIMD-only mode");
11865 }
11866
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)11867 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11868 SourceLocation Loc,
11869 ReductionCodeGen &RCG,
11870 unsigned N) {
11871 llvm_unreachable("Not supported in SIMD-only mode");
11872 }
11873
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)11874 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11875 SourceLocation Loc,
11876 llvm::Value *ReductionsPtr,
11877 LValue SharedLVal) {
11878 llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)11881 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11882 SourceLocation Loc) {
11883 llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)11886 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11887 CodeGenFunction &CGF, SourceLocation Loc,
11888 OpenMPDirectiveKind CancelRegion) {
11889 llvm_unreachable("Not supported in SIMD-only mode");
11890 }
11891
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)11892 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11893 SourceLocation Loc, const Expr *IfCond,
11894 OpenMPDirectiveKind CancelRegion) {
11895 llvm_unreachable("Not supported in SIMD-only mode");
11896 }
11897
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)11898 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11899 const OMPExecutableDirective &D, StringRef ParentName,
11900 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11901 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11902 llvm_unreachable("Not supported in SIMD-only mode");
11903 }
11904
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)11905 void CGOpenMPSIMDRuntime::emitTargetCall(
11906 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11907 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11908 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11909 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11910 const OMPLoopDirective &D)>
11911 SizeEmitter) {
11912 llvm_unreachable("Not supported in SIMD-only mode");
11913 }
11914
emitTargetFunctions(GlobalDecl GD)11915 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11916 llvm_unreachable("Not supported in SIMD-only mode");
11917 }
11918
emitTargetGlobalVariable(GlobalDecl GD)11919 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11920 llvm_unreachable("Not supported in SIMD-only mode");
11921 }
11922
emitTargetGlobal(GlobalDecl GD)11923 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11924 return false;
11925 }
11926
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)11927 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11928 const OMPExecutableDirective &D,
11929 SourceLocation Loc,
11930 llvm::Function *OutlinedFn,
11931 ArrayRef<llvm::Value *> CapturedVars) {
11932 llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)11935 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11936 const Expr *NumTeams,
11937 const Expr *ThreadLimit,
11938 SourceLocation Loc) {
11939 llvm_unreachable("Not supported in SIMD-only mode");
11940 }
11941
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11942 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11943 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11944 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11945 llvm_unreachable("Not supported in SIMD-only mode");
11946 }
11947
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11948 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11949 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11950 const Expr *Device) {
11951 llvm_unreachable("Not supported in SIMD-only mode");
11952 }
11953
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11954 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11955 const OMPLoopDirective &D,
11956 ArrayRef<Expr *> NumIterations) {
11957 llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11960 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11961 const OMPDependClause *C) {
11962 llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964
11965 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const11966 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11967 const VarDecl *NativeParam) const {
11968 llvm_unreachable("Not supported in SIMD-only mode");
11969 }
11970
11971 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11972 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11973 const VarDecl *NativeParam,
11974 const VarDecl *TargetParam) const {
11975 llvm_unreachable("Not supported in SIMD-only mode");
11976 }
11977