1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 // XXXAR: TODO fix default address space in this file as well
14 #define getUnqual(arg) get(arg, 0u)
15
16 #include "CGOpenMPRuntime.h"
17 #include "CGCXXABI.h"
18 #include "CGCleanup.h"
19 #include "CGRecordLayout.h"
20 #include "CodeGenFunction.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/BitmaskEnum.h"
27 #include "clang/Basic/FileManager.h"
28 #include "clang/Basic/OpenMPKinds.h"
29 #include "clang/Basic/SourceManager.h"
30 #include "clang/CodeGen/ConstantInitBuilder.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/SetOperations.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/GlobalValue.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
emitUntiedSwitch(CodeGenFunction &)92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
getRegionKind() const94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
getDirectiveKind() const96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
hasCancel() const98 bool hasCancel() const { return HasCancel; }
99
classof(const CGCapturedStmtInfo * Info)100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106 protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111 };
112
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
getThreadIDVariable() const128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
getHelperName() const131 StringRef getHelperName() const override { return HelperName; }
132
classof(const CGCapturedStmtInfo * Info)133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139 private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144 };
145
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
emitUntiedSwitch(CodeGenFunction & CGF) const177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
getNumberOfParts() const195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
getThreadIDVariable() const209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
getHelperName() const215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
emitUntiedSwitch(CodeGenFunction & CGF)217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
classof(const CGCapturedStmtInfo * Info)221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227 private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233 };
234
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
getContextValue() const247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
setContextValue(llvm::Value * V)253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
getThisFieldDecl() const270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
getThreadIDVariable() const278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
getHelperName() const292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
emitUntiedSwitch(CodeGenFunction & CGF)298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
getOldCSI() const303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
classof(const CGCapturedStmtInfo * Info)305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312 private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
getThreadIDVariable() const333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
getHelperName() const336 StringRef getHelperName() const override { return HelperName; }
337
classof(const CGCapturedStmtInfo * Info)338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343 private:
344 StringRef HelperName;
345 };
346
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(
375 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
376 }
377 (void)PrivScope.Privatize();
378 }
379
380 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const381 const FieldDecl *lookup(const VarDecl *VD) const override {
382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383 return FD;
384 return nullptr;
385 }
386
387 /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389 llvm_unreachable("No body for expressions");
390 }
391
392 /// Get a variable or parameter for storing global thread id
393 /// inside OpenMP construct.
getThreadIDVariable() const394 const VarDecl *getThreadIDVariable() const override {
395 llvm_unreachable("No thread id for expressions");
396 }
397
398 /// Get the name of the capture helper.
getHelperName() const399 StringRef getHelperName() const override {
400 llvm_unreachable("No helper name for expressions");
401 }
402
classof(const CGCapturedStmtInfo * Info)403 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404
405 private:
406 /// Private scope to capture global variables.
407 CodeGenFunction::OMPPrivateScope PrivScope;
408 };
409
410 /// RAII for emitting code of OpenMP constructs.
411 class InlinedOpenMPRegionRAII {
412 CodeGenFunction &CGF;
413 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
414 FieldDecl *LambdaThisCaptureField = nullptr;
415 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416
417 public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel)
424 : CGF(CGF) {
425 // Start emission for the construct.
426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430 CGF.LambdaThisCaptureField = nullptr;
431 BlockInfo = CGF.BlockInfo;
432 CGF.BlockInfo = nullptr;
433 }
434
~InlinedOpenMPRegionRAII()435 ~InlinedOpenMPRegionRAII() {
436 // Restore original CapturedStmtInfo only if we're done with code emission.
437 auto *OldCSI =
438 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
439 delete CGF.CapturedStmtInfo;
440 CGF.CapturedStmtInfo = OldCSI;
441 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
442 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
443 CGF.BlockInfo = BlockInfo;
444 }
445 };
446
447 /// Values for bit flags used in the ident_t to describe the fields.
448 /// All enumeric elements are named and described in accordance with the code
449 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
450 enum OpenMPLocationFlags : unsigned {
451 /// Use trampoline for internal microtask.
452 OMP_IDENT_IMD = 0x01,
453 /// Use c-style ident structure.
454 OMP_IDENT_KMPC = 0x02,
455 /// Atomic reduction option for kmpc_reduce.
456 OMP_ATOMIC_REDUCE = 0x10,
457 /// Explicit 'barrier' directive.
458 OMP_IDENT_BARRIER_EXPL = 0x20,
459 /// Implicit barrier in code.
460 OMP_IDENT_BARRIER_IMPL = 0x40,
461 /// Implicit barrier in 'for' directive.
462 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
463 /// Implicit barrier in 'sections' directive.
464 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
465 /// Implicit barrier in 'single' directive.
466 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
467 /// Call of __kmp_for_static_init for static loop.
468 OMP_IDENT_WORK_LOOP = 0x200,
469 /// Call of __kmp_for_static_init for sections.
470 OMP_IDENT_WORK_SECTIONS = 0x400,
471 /// Call of __kmp_for_static_init for distribute.
472 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
473 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
474 };
475
476 namespace {
477 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
478 /// Values for bit flags for marking which requires clauses have been used.
479 enum OpenMPOffloadingRequiresDirFlags : int64_t {
480 /// flag undefined.
481 OMP_REQ_UNDEFINED = 0x000,
482 /// no requires clause present.
483 OMP_REQ_NONE = 0x001,
484 /// reverse_offload clause.
485 OMP_REQ_REVERSE_OFFLOAD = 0x002,
486 /// unified_address clause.
487 OMP_REQ_UNIFIED_ADDRESS = 0x004,
488 /// unified_shared_memory clause.
489 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
490 /// dynamic_allocators clause.
491 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
492 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
493 };
494
495 enum OpenMPOffloadingReservedDeviceIDs {
496 /// Device ID if the device was not defined, runtime should get it
497 /// from environment variables in the spec.
498 OMP_DEVICEID_UNDEF = -1,
499 };
500 } // anonymous namespace
501
502 /// Describes ident structure that describes a source location.
503 /// All descriptions are taken from
504 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
505 /// Original structure:
506 /// typedef struct ident {
507 /// kmp_int32 reserved_1; /**< might be used in Fortran;
508 /// see above */
509 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
510 /// KMP_IDENT_KMPC identifies this union
511 /// member */
512 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
513 /// see above */
514 ///#if USE_ITT_BUILD
515 /// /* but currently used for storing
516 /// region-specific ITT */
517 /// /* contextual information. */
518 ///#endif /* USE_ITT_BUILD */
519 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
520 /// C++ */
521 /// char const *psource; /**< String describing the source location.
522 /// The string is composed of semi-colon separated
523 // fields which describe the source file,
524 /// the function and a pair of line numbers that
525 /// delimit the construct.
526 /// */
527 /// } ident_t;
528 enum IdentFieldIndex {
529 /// might be used in Fortran
530 IdentField_Reserved_1,
531 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
532 IdentField_Flags,
533 /// Not really used in Fortran any more
534 IdentField_Reserved_2,
535 /// Source[4] in Fortran, do not use for C++
536 IdentField_Reserved_3,
537 /// String describing the source location. The string is composed of
538 /// semi-colon separated fields which describe the source file, the function
539 /// and a pair of line numbers that delimit the construct.
540 IdentField_PSource
541 };
542
543 /// Schedule types for 'omp for' loops (these enumerators are taken from
544 /// the enum sched_type in kmp.h).
545 enum OpenMPSchedType {
546 /// Lower bound for default (unordered) versions.
547 OMP_sch_lower = 32,
548 OMP_sch_static_chunked = 33,
549 OMP_sch_static = 34,
550 OMP_sch_dynamic_chunked = 35,
551 OMP_sch_guided_chunked = 36,
552 OMP_sch_runtime = 37,
553 OMP_sch_auto = 38,
554 /// static with chunk adjustment (e.g., simd)
555 OMP_sch_static_balanced_chunked = 45,
556 /// Lower bound for 'ordered' versions.
557 OMP_ord_lower = 64,
558 OMP_ord_static_chunked = 65,
559 OMP_ord_static = 66,
560 OMP_ord_dynamic_chunked = 67,
561 OMP_ord_guided_chunked = 68,
562 OMP_ord_runtime = 69,
563 OMP_ord_auto = 70,
564 OMP_sch_default = OMP_sch_static,
565 /// dist_schedule types
566 OMP_dist_sch_static_chunked = 91,
567 OMP_dist_sch_static = 92,
568 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
569 /// Set if the monotonic schedule modifier was present.
570 OMP_sch_modifier_monotonic = (1 << 29),
571 /// Set if the nonmonotonic schedule modifier was present.
572 OMP_sch_modifier_nonmonotonic = (1 << 30),
573 };
574
575 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
576 /// region.
577 class CleanupTy final : public EHScopeStack::Cleanup {
578 PrePostActionTy *Action;
579
580 public:
CleanupTy(PrePostActionTy * Action)581 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)582 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
583 if (!CGF.HaveInsertPoint())
584 return;
585 Action->Exit(CGF);
586 }
587 };
588
589 } // anonymous namespace
590
operator ()(CodeGenFunction & CGF) const591 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
592 CodeGenFunction::RunCleanupsScope Scope(CGF);
593 if (PrePostAction) {
594 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
595 Callback(CodeGen, CGF, *PrePostAction);
596 } else {
597 PrePostActionTy Action;
598 Callback(CodeGen, CGF, Action);
599 }
600 }
601
602 /// Check if the combiner is a call to UDR combiner and if it is so return the
603 /// UDR decl used for reduction.
604 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)605 getReductionInit(const Expr *ReductionOp) {
606 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
607 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
608 if (const auto *DRE =
609 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
610 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
611 return DRD;
612 return nullptr;
613 }
614
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)615 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
616 const OMPDeclareReductionDecl *DRD,
617 const Expr *InitOp,
618 Address Private, Address Original,
619 QualType Ty) {
620 if (DRD->getInitializer()) {
621 std::pair<llvm::Function *, llvm::Function *> Reduction =
622 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
623 const auto *CE = cast<CallExpr>(InitOp);
624 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
625 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
626 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
627 const auto *LHSDRE =
628 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
629 const auto *RHSDRE =
630 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
631 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
632 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
633 [=]() { return Private; });
634 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
635 [=]() { return Original; });
636 (void)PrivateScope.Privatize();
637 RValue Func = RValue::get(Reduction.second);
638 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
639 CGF.EmitIgnoredExpr(InitOp);
640 } else {
641 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
642 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
643 auto *GV = new llvm::GlobalVariable(
644 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
645 llvm::GlobalValue::PrivateLinkage, Init, Name);
646 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
647 RValue InitRVal;
648 switch (CGF.getEvaluationKind(Ty)) {
649 case TEK_Scalar:
650 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
651 break;
652 case TEK_Complex:
653 InitRVal =
654 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
655 break;
656 case TEK_Aggregate:
657 InitRVal = RValue::getAggregate(LV.getAddress(CGF));
658 break;
659 }
660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663 /*IsInitializer=*/false);
664 }
665 }
666
667 /// Emit initialization of arrays of complex types.
668 /// \param DestAddr Address of the array.
669 /// \param Type Type of array.
670 /// \param Init Initial expression of array.
671 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())672 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
673 QualType Type, bool EmitDeclareReductionInit,
674 const Expr *Init,
675 const OMPDeclareReductionDecl *DRD,
676 Address SrcAddr = Address::invalid()) {
677 // Perform element-by-element initialization.
678 QualType ElementTy;
679
680 // Drill down to the base element type on both arrays.
681 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
682 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
683 DestAddr =
684 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
685 if (DRD)
686 SrcAddr =
687 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
688
689 llvm::Value *SrcBegin = nullptr;
690 if (DRD)
691 SrcBegin = SrcAddr.getPointer();
692 llvm::Value *DestBegin = DestAddr.getPointer();
693 // Cast from pointer to array type to pointer to single element.
694 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
695 // The basic structure here is a while-do loop.
696 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
697 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
698 llvm::Value *IsEmpty =
699 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
700 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
701
702 // Enter the loop body, making that address the current address.
703 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
704 CGF.EmitBlock(BodyBB);
705
706 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
707
708 llvm::PHINode *SrcElementPHI = nullptr;
709 Address SrcElementCurrent = Address::invalid();
710 if (DRD) {
711 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
712 "omp.arraycpy.srcElementPast");
713 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
714 SrcElementCurrent =
715 Address(SrcElementPHI,
716 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
717 }
718 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
719 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
720 DestElementPHI->addIncoming(DestBegin, EntryBB);
721 Address DestElementCurrent =
722 Address(DestElementPHI,
723 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724
725 // Emit copy.
726 {
727 CodeGenFunction::RunCleanupsScope InitScope(CGF);
728 if (EmitDeclareReductionInit) {
729 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
730 SrcElementCurrent, ElementTy);
731 } else
732 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
733 /*IsInitializer=*/false);
734 }
735
736 if (DRD) {
737 // Shift the address forward by one element.
738 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
739 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
740 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
741 }
742
743 // Shift the address forward by one element.
744 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
745 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
746 // Check whether we've reached the end.
747 llvm::Value *Done =
748 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
749 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
750 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
751
752 // Done.
753 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
754 }
755
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)756 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
757 return CGF.EmitOMPSharedLValue(E);
758 }
759
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)760 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
761 const Expr *E) {
762 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
763 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
764 return LValue();
765 }
766
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,const OMPDeclareReductionDecl * DRD)767 void ReductionCodeGen::emitAggregateInitialization(
768 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
769 const OMPDeclareReductionDecl *DRD) {
770 // Emit VarDecl with copy init for arrays.
771 // Get the address of the original variable captured in current
772 // captured region.
773 const auto *PrivateVD =
774 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
775 bool EmitDeclareReductionInit =
776 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
777 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
778 EmitDeclareReductionInit,
779 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
780 : PrivateVD->getInit(),
781 DRD, SharedLVal.getAddress(CGF));
782 }
783
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)784 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
785 ArrayRef<const Expr *> Origs,
786 ArrayRef<const Expr *> Privates,
787 ArrayRef<const Expr *> ReductionOps) {
788 ClausesData.reserve(Shareds.size());
789 SharedAddresses.reserve(Shareds.size());
790 Sizes.reserve(Shareds.size());
791 BaseDecls.reserve(Shareds.size());
792 const auto *IOrig = Origs.begin();
793 const auto *IPriv = Privates.begin();
794 const auto *IRed = ReductionOps.begin();
795 for (const Expr *Ref : Shareds) {
796 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
797 std::advance(IOrig, 1);
798 std::advance(IPriv, 1);
799 std::advance(IRed, 1);
800 }
801 }
802
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)803 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
804 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
805 "Number of generated lvalues must be exactly N.");
806 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
807 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
808 SharedAddresses.emplace_back(First, Second);
809 if (ClausesData[N].Shared == ClausesData[N].Ref) {
810 OrigAddresses.emplace_back(First, Second);
811 } else {
812 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
813 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
814 OrigAddresses.emplace_back(First, Second);
815 }
816 }
817
emitAggregateType(CodeGenFunction & CGF,unsigned N)818 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
819 const auto *PrivateVD =
820 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
821 QualType PrivateType = PrivateVD->getType();
822 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
823 if (!PrivateType->isVariablyModifiedType()) {
824 Sizes.emplace_back(
825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
826 nullptr);
827 return;
828 }
829 llvm::Value *Size;
830 llvm::Value *SizeInChars;
831 auto *ElemType =
832 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
833 ->getElementType();
834 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
835 if (AsArraySection) {
836 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
837 OrigAddresses[N].first.getPointer(CGF));
838 Size = CGF.Builder.CreateNUWAdd(
839 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
840 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
841 } else {
842 SizeInChars =
843 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
844 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
845 }
846 Sizes.emplace_back(SizeInChars, Size);
847 CodeGenFunction::OpaqueValueMapping OpaqueMap(
848 CGF,
849 cast<OpaqueValueExpr>(
850 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
851 RValue::get(Size));
852 CGF.EmitVariablyModifiedType(PrivateType);
853 }
854
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)855 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
856 llvm::Value *Size) {
857 const auto *PrivateVD =
858 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
859 QualType PrivateType = PrivateVD->getType();
860 if (!PrivateType->isVariablyModifiedType()) {
861 assert(!Size && !Sizes[N].second &&
862 "Size should be nullptr for non-variably modified reduction "
863 "items.");
864 return;
865 }
866 CodeGenFunction::OpaqueValueMapping OpaqueMap(
867 CGF,
868 cast<OpaqueValueExpr>(
869 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
870 RValue::get(Size));
871 CGF.EmitVariablyModifiedType(PrivateType);
872 }
873
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)874 void ReductionCodeGen::emitInitialization(
875 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
876 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
877 assert(SharedAddresses.size() > N && "No variable was generated");
878 const auto *PrivateVD =
879 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
880 const OMPDeclareReductionDecl *DRD =
881 getReductionInit(ClausesData[N].ReductionOp);
882 QualType PrivateType = PrivateVD->getType();
883 PrivateAddr = CGF.Builder.CreateElementBitCast(
884 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
885 QualType SharedType = SharedAddresses[N].first.getType();
886 SharedLVal = CGF.MakeAddrLValue(
887 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
888 CGF.ConvertTypeForMem(SharedType)),
889 SharedType, SharedAddresses[N].first.getBaseInfo(),
890 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
891 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
892 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
893 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
894 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
895 PrivateAddr, SharedLVal.getAddress(CGF),
896 SharedLVal.getType());
897 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
898 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
899 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
900 PrivateVD->getType().getQualifiers(),
901 /*IsInitializer=*/false);
902 }
903 }
904
needCleanups(unsigned N)905 bool ReductionCodeGen::needCleanups(unsigned N) {
906 const auto *PrivateVD =
907 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
908 QualType PrivateType = PrivateVD->getType();
909 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
910 return DTorKind != QualType::DK_none;
911 }
912
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
914 Address PrivateAddr) {
915 const auto *PrivateVD =
916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917 QualType PrivateType = PrivateVD->getType();
918 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919 if (needCleanups(N)) {
920 PrivateAddr = CGF.Builder.CreateElementBitCast(
921 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
922 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
923 }
924 }
925
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
927 LValue BaseLV) {
928 BaseTy = BaseTy.getNonReferenceType();
929 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
930 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
931 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
932 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
933 } else {
934 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
935 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
936 }
937 BaseTy = BaseTy->getPointeeType();
938 }
939 return CGF.MakeAddrLValue(
940 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
941 CGF.ConvertTypeForMem(ElTy)),
942 BaseLV.getType(), BaseLV.getBaseInfo(),
943 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
944 }
945
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,llvm::Type * BaseLVType,CharUnits BaseLVAlignment,llvm::Value * Addr)946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
947 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
948 llvm::Value *Addr) {
949 Address Tmp = Address::invalid();
950 Address TopTmp = Address::invalid();
951 Address MostTopTmp = Address::invalid();
952 BaseTy = BaseTy.getNonReferenceType();
953 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
954 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
955 Tmp = CGF.CreateMemTemp(BaseTy);
956 if (TopTmp.isValid())
957 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
958 else
959 MostTopTmp = Tmp;
960 TopTmp = Tmp;
961 BaseTy = BaseTy->getPointeeType();
962 }
963 llvm::Type *Ty = BaseLVType;
964 if (Tmp.isValid())
965 Ty = Tmp.getElementType();
966 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
967 if (Tmp.isValid()) {
968 CGF.Builder.CreateStore(Addr, Tmp);
969 return MostTopTmp;
970 }
971 return Address(Addr, BaseLVAlignment);
972 }
973
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
975 const VarDecl *OrigVD = nullptr;
976 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
977 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
978 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
979 Base = TempOASE->getBase()->IgnoreParenImpCasts();
980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981 Base = TempASE->getBase()->IgnoreParenImpCasts();
982 DE = cast<DeclRefExpr>(Base);
983 OrigVD = cast<VarDecl>(DE->getDecl());
984 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
985 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987 Base = TempASE->getBase()->IgnoreParenImpCasts();
988 DE = cast<DeclRefExpr>(Base);
989 OrigVD = cast<VarDecl>(DE->getDecl());
990 }
991 return OrigVD;
992 }
993
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
995 Address PrivateAddr) {
996 const DeclRefExpr *DE;
997 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
998 BaseDecls.emplace_back(OrigVD);
999 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1000 LValue BaseLValue =
1001 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1002 OriginalBaseLValue);
1003 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1004 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1005 llvm::Value *PrivatePointer =
1006 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1007 PrivateAddr.getPointer(),
1008 SharedAddresses[N].first.getAddress(CGF).getType());
1009 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1010 return castToBase(CGF, OrigVD->getType(),
1011 SharedAddresses[N].first.getType(),
1012 OriginalBaseLValue.getAddress(CGF).getType(),
1013 OriginalBaseLValue.getAlignment(), Ptr);
1014 }
1015 BaseDecls.emplace_back(
1016 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1017 return PrivateAddr;
1018 }
1019
usesReductionInitializer(unsigned N) const1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1021 const OMPDeclareReductionDecl *DRD =
1022 getReductionInit(ClausesData[N].ReductionOp);
1023 return DRD && DRD->getInitializer();
1024 }
1025
getThreadIDVariableLValue(CodeGenFunction & CGF)1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1027 return CGF.EmitLoadOfPointerLValue(
1028 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1029 getThreadIDVariable()->getType()->castAs<PointerType>());
1030 }
1031
EmitBody(CodeGenFunction & CGF,const Stmt *)1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1033 if (!CGF.HaveInsertPoint())
1034 return;
1035 // 1.2.2 OpenMP Language Terminology
1036 // Structured block - An executable statement with a single entry at the
1037 // top and a single exit at the bottom.
1038 // The point of exit cannot be a branch out of the structured block.
1039 // longjmp() and throw() must not violate the entry/exit criteria.
1040 CGF.EHStack.pushTerminate();
1041 CodeGen(CGF);
1042 CGF.EHStack.popTerminate();
1043 }
1044
getThreadIDVariableLValue(CodeGenFunction & CGF)1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1046 CodeGenFunction &CGF) {
1047 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1048 getThreadIDVariable()->getType(),
1049 AlignmentSource::Decl);
1050 }
1051
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1053 QualType FieldTy) {
1054 auto *Field = FieldDecl::Create(
1055 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1056 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1057 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1058 Field->setAccess(AS_public);
1059 DC->addDecl(Field);
1060 return Field;
1061 }
1062
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1064 StringRef Separator)
1065 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1066 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1067 ASTContext &C = CGM.getContext();
1068 RecordDecl *RD = C.buildImplicitRecord("ident_t");
1069 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1070 RD->startDefinition();
1071 // reserved_1
1072 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1073 // flags
1074 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1075 // reserved_2
1076 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1077 // reserved_3
1078 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1079 // psource
1080 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1081 RD->completeDefinition();
1082 IdentQTy = C.getRecordType(RD);
1083 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1084 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085
1086 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087 OMPBuilder.initialize();
1088 loadOffloadInfoMetadata();
1089 }
1090
clear()1091 void CGOpenMPRuntime::clear() {
1092 InternalVars.clear();
1093 // Clean non-target variable declarations possibly used only in debug info.
1094 for (const auto &Data : EmittedNonTargetVariables) {
1095 if (!Data.getValue().pointsToAliveValue())
1096 continue;
1097 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098 if (!GV)
1099 continue;
1100 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101 continue;
1102 GV->eraseFromParent();
1103 }
1104 }
1105
getName(ArrayRef<StringRef> Parts) const1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107 SmallString<128> Buffer;
1108 llvm::raw_svector_ostream OS(Buffer);
1109 StringRef Sep = FirstSeparator;
1110 for (StringRef Part : Parts) {
1111 OS << Sep << Part;
1112 Sep = Separator;
1113 }
1114 return std::string(OS.str());
1115 }
1116
1117 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119 const Expr *CombinerInitializer, const VarDecl *In,
1120 const VarDecl *Out, bool IsCombiner) {
1121 // void .omp_combiner.(Ty *in, Ty *out);
1122 ASTContext &C = CGM.getContext();
1123 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124 FunctionArgList Args;
1125 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129 Args.push_back(&OmpOutParm);
1130 Args.push_back(&OmpInParm);
1131 const CGFunctionInfo &FnInfo =
1132 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134 std::string Name = CGM.getOpenMPRuntime().getName(
1135 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137 Name, &CGM.getModule());
1138 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139 if (CGM.getLangOpts().Optimize) {
1140 Fn->removeFnAttr(llvm::Attribute::NoInline);
1141 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143 }
1144 CodeGenFunction CGF(CGM);
1145 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148 Out->getLocation());
1149 CodeGenFunction::OMPPrivateScope Scope(CGF);
1150 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153 .getAddress(CGF);
1154 });
1155 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158 .getAddress(CGF);
1159 });
1160 (void)Scope.Privatize();
1161 if (!IsCombiner && Out->hasInit() &&
1162 !CGF.isTrivialInitializer(Out->getInit())) {
1163 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164 Out->getType().getQualifiers(),
1165 /*IsInitializer=*/true);
1166 }
1167 if (CombinerInitializer)
1168 CGF.EmitIgnoredExpr(CombinerInitializer);
1169 Scope.ForceCleanup();
1170 CGF.FinishFunction();
1171 return Fn;
1172 }
1173
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176 if (UDRMap.count(D) > 0)
1177 return;
1178 llvm::Function *Combiner = emitCombinerOrInitializer(
1179 CGM, D->getType(), D->getCombiner(),
1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182 /*IsCombiner=*/true);
1183 llvm::Function *Initializer = nullptr;
1184 if (const Expr *Init = D->getInitializer()) {
1185 Initializer = emitCombinerOrInitializer(
1186 CGM, D->getType(),
1187 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188 : nullptr,
1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191 /*IsCombiner=*/false);
1192 }
1193 UDRMap.try_emplace(D, Combiner, Initializer);
1194 if (CGF) {
1195 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196 Decls.second.push_back(D);
1197 }
1198 }
1199
1200 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202 auto I = UDRMap.find(D);
1203 if (I != UDRMap.end())
1204 return I->second;
1205 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206 return UDRMap.lookup(D);
1207 }
1208
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
PushAndPopStackRAII__anone0633a090811::PushAndPopStackRAII1213 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214 bool HasCancel)
1215 : OMPBuilder(OMPBuilder) {
1216 if (!OMPBuilder)
1217 return;
1218
1219 // The following callback is the crucial part of clangs cleanup process.
1220 //
1221 // NOTE:
1222 // Once the OpenMPIRBuilder is used to create parallel regions (and
1223 // similar), the cancellation destination (Dest below) is determined via
1224 // IP. That means if we have variables to finalize we split the block at IP,
1225 // use the new block (=BB) as destination to build a JumpDest (via
1226 // getJumpDestInCurrentScope(BB)) which then is fed to
1227 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228 // to push & pop an FinalizationInfo object.
1229 // The FiniCB will still be needed but at the point where the
1230 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232 assert(IP.getBlock()->end() == IP.getPoint() &&
1233 "Clang CG should cause non-terminated block!");
1234 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235 CGF.Builder.restoreIP(IP);
1236 CodeGenFunction::JumpDest Dest =
1237 CGF.getOMPCancelDestination(OMPD_parallel);
1238 CGF.EmitBranchThroughCleanup(Dest);
1239 };
1240
1241 // TODO: Remove this once we emit parallel regions through the
1242 // OpenMPIRBuilder as it can do this setup internally.
1243 llvm::OpenMPIRBuilder::FinalizationInfo FI(
1244 {FiniCB, OMPD_parallel, HasCancel});
1245 OMPBuilder->pushFinalizationCB(std::move(FI));
1246 }
~PushAndPopStackRAII__anone0633a090811::PushAndPopStackRAII1247 ~PushAndPopStackRAII() {
1248 if (OMPBuilder)
1249 OMPBuilder->popFinalizationCB();
1250 }
1251 llvm::OpenMPIRBuilder *OMPBuilder;
1252 };
1253 } // namespace
1254
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1256 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1257 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1258 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1259 assert(ThreadIDVar->getType()->isPointerType() &&
1260 "thread id variable must be of type kmp_int32 *");
1261 CodeGenFunction CGF(CGM, true);
1262 bool HasCancel = false;
1263 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1264 HasCancel = OPD->hasCancel();
1265 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1266 HasCancel = OPD->hasCancel();
1267 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1268 HasCancel = OPSD->hasCancel();
1269 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1270 HasCancel = OPFD->hasCancel();
1271 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1272 HasCancel = OPFD->hasCancel();
1273 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1274 HasCancel = OPFD->hasCancel();
1275 else if (const auto *OPFD =
1276 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1277 HasCancel = OPFD->hasCancel();
1278 else if (const auto *OPFD =
1279 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1280 HasCancel = OPFD->hasCancel();
1281
1282 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1283 // parallel region to make cancellation barriers work properly.
1284 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1285 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1286 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1287 HasCancel, OutlinedHelperName);
1288 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1289 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1290 }
1291
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1293 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1296 return emitParallelOrTeamsOutlinedFunction(
1297 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1301 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1303 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1304 return emitParallelOrTeamsOutlinedFunction(
1305 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1306 }
1307
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1309 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1310 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1311 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1312 bool Tied, unsigned &NumberOfParts) {
1313 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1314 PrePostActionTy &) {
1315 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1316 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1317 llvm::Value *TaskArgs[] = {
1318 UpLoc, ThreadID,
1319 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1320 TaskTVar->getType()->castAs<PointerType>())
1321 .getPointer(CGF)};
1322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1323 CGM.getModule(), OMPRTL___kmpc_omp_task),
1324 TaskArgs);
1325 };
1326 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1327 UntiedCodeGen);
1328 CodeGen.setAction(Action);
1329 assert(!ThreadIDVar->getType()->isPointerType() &&
1330 "thread id variable must be of type kmp_int32 for tasks");
1331 const OpenMPDirectiveKind Region =
1332 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1333 : OMPD_task;
1334 const CapturedStmt *CS = D.getCapturedStmt(Region);
1335 bool HasCancel = false;
1336 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1337 HasCancel = TD->hasCancel();
1338 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1339 HasCancel = TD->hasCancel();
1340 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1341 HasCancel = TD->hasCancel();
1342 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1343 HasCancel = TD->hasCancel();
1344
1345 CodeGenFunction CGF(CGM, true);
1346 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1347 InnermostKind, HasCancel, Action);
1348 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1350 if (!Tied)
1351 NumberOfParts = Action.getNumberOfParts();
1352 return Res;
1353 }
1354
buildStructValue(ConstantStructBuilder & Fields,CodeGenModule & CGM,const RecordDecl * RD,const CGRecordLayout & RL,ArrayRef<llvm::Constant * > Data)1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1356 const RecordDecl *RD, const CGRecordLayout &RL,
1357 ArrayRef<llvm::Constant *> Data) {
1358 llvm::StructType *StructTy = RL.getLLVMType();
1359 unsigned PrevIdx = 0;
1360 ConstantInitBuilder CIBuilder(CGM);
1361 auto DI = Data.begin();
1362 for (const FieldDecl *FD : RD->fields()) {
1363 unsigned Idx = RL.getLLVMFieldNo(FD);
1364 // Fill the alignment.
1365 for (unsigned I = PrevIdx; I < Idx; ++I)
1366 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1367 PrevIdx = Idx + 1;
1368 Fields.add(*DI);
1369 ++DI;
1370 }
1371 }
1372
1373 template <class... As>
1374 static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule & CGM,QualType Ty,bool IsConstant,ArrayRef<llvm::Constant * > Data,const Twine & Name,As &&...Args)1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1376 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1377 As &&... Args) {
1378 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1379 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1380 ConstantInitBuilder CIBuilder(CGM);
1381 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1382 buildStructValue(Fields, CGM, RD, RL, Data);
1383 return Fields.finishAndCreateGlobal(
1384 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1385 std::forward<As>(Args)...);
1386 }
1387
1388 template <typename T>
1389 static void
createConstantGlobalStructAndAddToParent(CodeGenModule & CGM,QualType Ty,ArrayRef<llvm::Constant * > Data,T & Parent)1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1391 ArrayRef<llvm::Constant *> Data,
1392 T &Parent) {
1393 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1394 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1395 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1396 buildStructValue(Fields, CGM, RD, RL, Data);
1397 Fields.finishAndAddTo(Parent);
1398 }
1399
getOrCreateDefaultLocation(unsigned Flags)1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1401 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1402 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1403 FlagsTy FlagsKey(Flags, Reserved2Flags);
1404 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1405 if (!Entry) {
1406 if (!DefaultOpenMPPSource) {
1407 // Initialize default location for psource field of ident_t structure of
1408 // all ident_t objects. Format is ";file;function;line;column;;".
1409 // Taken from
1410 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1411 DefaultOpenMPPSource =
1412 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1413 DefaultOpenMPPSource =
1414 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1415 }
1416
1417 llvm::Constant *Data[] = {
1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1419 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1420 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1421 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1422 llvm::GlobalValue *DefaultOpenMPLocation =
1423 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1424 llvm::GlobalValue::PrivateLinkage);
1425 DefaultOpenMPLocation->setUnnamedAddr(
1426 llvm::GlobalValue::UnnamedAddr::Global);
1427
1428 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1429 }
1430 return Address(Entry, Align);
1431 }
1432
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1434 bool AtCurrentPoint) {
1435 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1436 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1437
1438 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1439 if (AtCurrentPoint) {
1440 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1441 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1442 } else {
1443 Elem.second.ServiceInsertPt =
1444 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1445 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1446 }
1447 }
1448
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1450 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1451 if (Elem.second.ServiceInsertPt) {
1452 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1453 Elem.second.ServiceInsertPt = nullptr;
1454 Ptr->eraseFromParent();
1455 }
1456 }
1457
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1458 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1459 SourceLocation Loc,
1460 unsigned Flags) {
1461 Flags |= OMP_IDENT_KMPC;
1462 // If no debug info is generated - return global default location.
1463 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1464 Loc.isInvalid())
1465 return getOrCreateDefaultLocation(Flags).getPointer();
1466
1467 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1468
1469 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1470 Address LocValue = Address::invalid();
1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472 if (I != OpenMPLocThreadIDMap.end())
1473 LocValue = Address(I->second.DebugLoc, Align);
1474
1475 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1476 // GetOpenMPThreadID was called before this routine.
1477 if (!LocValue.isValid()) {
1478 // Generate "ident_t .kmpc_loc.addr;"
1479 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1480 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1481 Elem.second.DebugLoc = AI.getPointer();
1482 LocValue = AI;
1483
1484 if (!Elem.second.ServiceInsertPt)
1485 setLocThreadIdInsertPt(CGF);
1486 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1487 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1488 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1489 CGF.getTypeSize(IdentQTy));
1490 }
1491
1492 // char **psource = &.kmpc_loc_<flags>.addr.psource;
1493 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1494 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1495 LValue PSource =
1496 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1497
1498 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1499 if (OMPDebugLoc == nullptr) {
1500 SmallString<128> Buffer2;
1501 llvm::raw_svector_ostream OS2(Buffer2);
1502 // Build debug location
1503 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1504 OS2 << ";" << PLoc.getFilename() << ";";
1505 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1506 OS2 << FD->getQualifiedNameAsString();
1507 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1508 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1509 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1510 }
1511 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1512 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1513
1514 // Our callers always pass this to a runtime function, so for
1515 // convenience, go ahead and return a naked pointer.
1516 return LocValue.getPointer();
1517 }
1518
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1519 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1520 SourceLocation Loc) {
1521 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1522
1523 llvm::Value *ThreadID = nullptr;
1524 // Check whether we've already cached a load of the thread id in this
1525 // function.
1526 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1527 if (I != OpenMPLocThreadIDMap.end()) {
1528 ThreadID = I->second.ThreadID;
1529 if (ThreadID != nullptr)
1530 return ThreadID;
1531 }
1532 // If exceptions are enabled, do not use parameter to avoid possible crash.
1533 if (auto *OMPRegionInfo =
1534 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1535 if (OMPRegionInfo->getThreadIDVariable()) {
1536 // Check if this an outlined function with thread id passed as argument.
1537 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1538 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1539 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1540 !CGF.getLangOpts().CXXExceptions ||
1541 CGF.Builder.GetInsertBlock() == TopBlock ||
1542 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1543 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1544 TopBlock ||
1545 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1546 CGF.Builder.GetInsertBlock()) {
1547 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1548 // If value loaded in entry block, cache it and use it everywhere in
1549 // function.
1550 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1551 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1552 Elem.second.ThreadID = ThreadID;
1553 }
1554 return ThreadID;
1555 }
1556 }
1557 }
1558
1559 // This is not an outlined function region - need to call __kmpc_int32
1560 // kmpc_global_thread_num(ident_t *loc).
1561 // Generate thread id value and cache this value for use across the
1562 // function.
1563 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564 if (!Elem.second.ServiceInsertPt)
1565 setLocThreadIdInsertPt(CGF);
1566 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1567 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1568 llvm::CallInst *Call = CGF.Builder.CreateCall(
1569 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1570 OMPRTL___kmpc_global_thread_num),
1571 emitUpdateLocation(CGF, Loc));
1572 Call->setCallingConv(CGF.getRuntimeCC());
1573 Elem.second.ThreadID = Call;
1574 return Call;
1575 }
1576
functionFinished(CodeGenFunction & CGF)1577 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1578 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1579 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1580 clearLocThreadIdInsertPt(CGF);
1581 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1582 }
1583 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1584 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1585 UDRMap.erase(D);
1586 FunctionUDRMap.erase(CGF.CurFn);
1587 }
1588 auto I = FunctionUDMMap.find(CGF.CurFn);
1589 if (I != FunctionUDMMap.end()) {
1590 for(const auto *D : I->second)
1591 UDMMap.erase(D);
1592 FunctionUDMMap.erase(I);
1593 }
1594 LastprivateConditionalToTypes.erase(CGF.CurFn);
1595 }
1596
getIdentTyPointerTy()1597 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1598 return IdentTy->getPointerTo();
1599 }
1600
getKmpc_MicroPointerTy()1601 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1602 if (!Kmpc_MicroTy) {
1603 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1604 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1605 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1606 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1607 }
1608 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1609 }
1610
1611 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned)1612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1613 assert((IVSize == 32 || IVSize == 64) &&
1614 "IV size is not compatible with the omp runtime");
1615 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1616 : "__kmpc_for_static_init_4u")
1617 : (IVSigned ? "__kmpc_for_static_init_8"
1618 : "__kmpc_for_static_init_8u");
1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621 llvm::Type *TypeParams[] = {
1622 getIdentTyPointerTy(), // loc
1623 CGM.Int32Ty, // tid
1624 CGM.Int32Ty, // schedtype
1625 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1626 PtrTy, // p_lower
1627 PtrTy, // p_upper
1628 PtrTy, // p_stride
1629 ITy, // incr
1630 ITy // chunk
1631 };
1632 auto *FnTy =
1633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1634 return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636
1637 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1639 assert((IVSize == 32 || IVSize == 64) &&
1640 "IV size is not compatible with the omp runtime");
1641 StringRef Name =
1642 IVSize == 32
1643 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1644 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1645 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1647 CGM.Int32Ty, // tid
1648 CGM.Int32Ty, // schedtype
1649 ITy, // lower
1650 ITy, // upper
1651 ITy, // stride
1652 ITy // chunk
1653 };
1654 auto *FnTy =
1655 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1656 return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658
1659 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1661 assert((IVSize == 32 || IVSize == 64) &&
1662 "IV size is not compatible with the omp runtime");
1663 StringRef Name =
1664 IVSize == 32
1665 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1666 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1667 llvm::Type *TypeParams[] = {
1668 getIdentTyPointerTy(), // loc
1669 CGM.Int32Ty, // tid
1670 };
1671 auto *FnTy =
1672 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1673 return CGM.CreateRuntimeFunction(FnTy, Name);
1674 }
1675
1676 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1678 assert((IVSize == 32 || IVSize == 64) &&
1679 "IV size is not compatible with the omp runtime");
1680 StringRef Name =
1681 IVSize == 32
1682 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1683 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1684 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1685 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1686 llvm::Type *TypeParams[] = {
1687 getIdentTyPointerTy(), // loc
1688 CGM.Int32Ty, // tid
1689 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1690 PtrTy, // p_lower
1691 PtrTy, // p_upper
1692 PtrTy // p_stride
1693 };
1694 auto *FnTy =
1695 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1696 return CGM.CreateRuntimeFunction(FnTy, Name);
1697 }
1698
1699 /// Obtain information that uniquely identifies a target entry. This
1700 /// consists of the file and device IDs as well as line number associated with
1701 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)1702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1703 unsigned &DeviceID, unsigned &FileID,
1704 unsigned &LineNum) {
1705 SourceManager &SM = C.getSourceManager();
1706
1707 // The loc should be always valid and have a file ID (the user cannot use
1708 // #pragma directives in macros)
1709
1710 assert(Loc.isValid() && "Source location is expected to be always valid.");
1711
1712 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1713 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1714
1715 llvm::sys::fs::UniqueID ID;
1716 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1717 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1718 << PLoc.getFilename() << EC.message();
1719
1720 DeviceID = ID.getDevice();
1721 FileID = ID.getFile();
1722 LineNum = PLoc.getLine();
1723 }
1724
getAddrOfDeclareTargetVar(const VarDecl * VD)1725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1726 if (CGM.getLangOpts().OpenMPSimd)
1727 return Address::invalid();
1728 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1729 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1730 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1731 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1732 HasRequiresUnifiedSharedMemory))) {
1733 SmallString<64> PtrName;
1734 {
1735 llvm::raw_svector_ostream OS(PtrName);
1736 OS << CGM.getMangledName(GlobalDecl(VD));
1737 if (!VD->isExternallyVisible()) {
1738 unsigned DeviceID, FileID, Line;
1739 getTargetEntryUniqueInfo(CGM.getContext(),
1740 VD->getCanonicalDecl()->getBeginLoc(),
1741 DeviceID, FileID, Line);
1742 OS << llvm::format("_%x", FileID);
1743 }
1744 OS << "_decl_tgt_ref_ptr";
1745 }
1746 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1747 if (!Ptr) {
1748 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1749 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1750 PtrName);
1751
1752 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1753 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1754
1755 if (!CGM.getLangOpts().OpenMPIsDevice)
1756 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1757 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1758 }
1759 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1760 }
1761 return Address::invalid();
1762 }
1763
1764 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1766 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1767 !CGM.getContext().getTargetInfo().isTLSSupported());
1768 // Lookup the entry, lazily creating it if necessary.
1769 std::string Suffix = getName({"cache", ""});
1770 return getOrCreateInternalVariable(
1771 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1772 }
1773
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1775 const VarDecl *VD,
1776 Address VDAddr,
1777 SourceLocation Loc) {
1778 if (CGM.getLangOpts().OpenMPUseTLS &&
1779 CGM.getContext().getTargetInfo().isTLSSupported())
1780 return VDAddr;
1781
1782 llvm::Type *VarTy = VDAddr.getElementType();
1783 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1784 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1785 CGM.Int8PtrTy),
1786 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1787 getOrCreateThreadPrivateCache(VD)};
1788 return Address(CGF.EmitRuntimeCall(
1789 OMPBuilder.getOrCreateRuntimeFunction(
1790 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1791 Args),
1792 VDAddr.getAlignment());
1793 }
1794
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1795 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1796 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1797 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1798 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1799 // library.
1800 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1801 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1802 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1803 OMPLoc);
1804 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1805 // to register constructor/destructor for variable.
1806 llvm::Value *Args[] = {
1807 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1808 Ctor, CopyCtor, Dtor};
1809 CGF.EmitRuntimeCall(
1810 OMPBuilder.getOrCreateRuntimeFunction(
1811 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1812 Args);
1813 }
1814
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1815 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1816 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1817 bool PerformInit, CodeGenFunction *CGF) {
1818 if (CGM.getLangOpts().OpenMPUseTLS &&
1819 CGM.getContext().getTargetInfo().isTLSSupported())
1820 return nullptr;
1821
1822 VD = VD->getDefinition(CGM.getContext());
1823 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1824 QualType ASTTy = VD->getType();
1825
1826 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1827 const Expr *Init = VD->getAnyInitializer();
1828 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1829 // Generate function that re-emits the declaration's initializer into the
1830 // threadprivate copy of the variable VD
1831 CodeGenFunction CtorCGF(CGM);
1832 FunctionArgList Args;
1833 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1834 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1835 ImplicitParamDecl::Other);
1836 Args.push_back(&Dst);
1837
1838 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1839 CGM.getContext().VoidPtrTy, Args);
1840 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1841 std::string Name = getName({"__kmpc_global_ctor_", ""});
1842 llvm::Function *Fn =
1843 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1844 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1845 Args, Loc, Loc);
1846 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1847 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1848 CGM.getContext().VoidPtrTy, Dst.getLocation());
1849 Address Arg = Address(ArgVal, VDAddr.getAlignment());
1850 Arg = CtorCGF.Builder.CreateElementBitCast(
1851 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1852 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1853 /*IsInitializer=*/true);
1854 ArgVal = CtorCGF.EmitLoadOfScalar(
1855 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1856 CGM.getContext().VoidPtrTy, Dst.getLocation());
1857 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1858 CtorCGF.FinishFunction();
1859 Ctor = Fn;
1860 }
1861 if (VD->getType().isDestructedType() != QualType::DK_none) {
1862 // Generate function that emits destructor call for the threadprivate copy
1863 // of the variable VD
1864 CodeGenFunction DtorCGF(CGM);
1865 FunctionArgList Args;
1866 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1867 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1868 ImplicitParamDecl::Other);
1869 Args.push_back(&Dst);
1870
1871 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1872 CGM.getContext().VoidTy, Args);
1873 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1874 std::string Name = getName({"__kmpc_global_dtor_", ""});
1875 llvm::Function *Fn =
1876 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1877 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1878 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1879 Loc, Loc);
1880 // Create a scope with an artificial location for the body of this function.
1881 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1882 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1883 DtorCGF.GetAddrOfLocalVar(&Dst),
1884 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1885 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1886 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1887 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1888 DtorCGF.FinishFunction();
1889 Dtor = Fn;
1890 }
1891 // Do not emit init function if it is not required.
1892 if (!Ctor && !Dtor)
1893 return nullptr;
1894
1895 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1896 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
1897 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1898 /*isVarArg=*/false)
1899 ->getPointerTo(DefaultAS);
1900 // Copying constructor for the threadprivate variable.
1901 // Must be NULL - reserved by runtime, but currently it requires that this
1902 // parameter is always NULL. Otherwise it fires assertion.
1903 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1904 if (Ctor == nullptr) {
1905 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1906 /*isVarArg=*/false)
1907 ->getPointerTo(DefaultAS);
1908 Ctor = llvm::Constant::getNullValue(CtorTy);
1909 }
1910 if (Dtor == nullptr) {
1911 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1912 /*isVarArg=*/false)
1913 ->getPointerTo(DefaultAS);
1914 Dtor = llvm::Constant::getNullValue(DtorTy);
1915 }
1916 if (!CGF) {
1917 auto *InitFunctionTy =
1918 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1919 std::string Name = getName({"__omp_threadprivate_init_", ""});
1920 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1921 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1922 CodeGenFunction InitCGF(CGM);
1923 FunctionArgList ArgList;
1924 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1925 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1926 Loc, Loc);
1927 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1928 InitCGF.FinishFunction();
1929 return InitFunction;
1930 }
1931 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1932 }
1933 return nullptr;
1934 }
1935
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)1936 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1937 llvm::GlobalVariable *Addr,
1938 bool PerformInit) {
1939 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1940 !CGM.getLangOpts().OpenMPIsDevice)
1941 return false;
1942 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1943 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1944 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1945 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1946 HasRequiresUnifiedSharedMemory))
1947 return CGM.getLangOpts().OpenMPIsDevice;
1948 VD = VD->getDefinition(CGM.getContext());
1949 assert(VD && "Unknown VarDecl");
1950
1951 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1952 return CGM.getLangOpts().OpenMPIsDevice;
1953
1954 QualType ASTTy = VD->getType();
1955 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1956
1957 // Produce the unique prefix to identify the new target regions. We use
1958 // the source location of the variable declaration which we know to not
1959 // conflict with any target region.
1960 unsigned DeviceID;
1961 unsigned FileID;
1962 unsigned Line;
1963 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1964 SmallString<128> Buffer, Out;
1965 {
1966 llvm::raw_svector_ostream OS(Buffer);
1967 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1968 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1969 }
1970
1971 const Expr *Init = VD->getAnyInitializer();
1972 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1973 llvm::Constant *Ctor;
1974 llvm::Constant *ID;
1975 if (CGM.getLangOpts().OpenMPIsDevice) {
1976 // Generate function that re-emits the declaration's initializer into
1977 // the threadprivate copy of the variable VD
1978 CodeGenFunction CtorCGF(CGM);
1979
1980 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1981 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1982 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1983 FTy, Twine(Buffer, "_ctor"), FI, Loc);
1984 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1985 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1986 FunctionArgList(), Loc, Loc);
1987 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1988 CtorCGF.EmitAnyExprToMem(Init,
1989 Address(Addr, CGM.getContext().getDeclAlign(VD)),
1990 Init->getType().getQualifiers(),
1991 /*IsInitializer=*/true);
1992 CtorCGF.FinishFunction();
1993 Ctor = Fn;
1994 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1995 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1996 } else {
1997 Ctor = new llvm::GlobalVariable(
1998 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1999 llvm::GlobalValue::PrivateLinkage,
2000 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2001 ID = Ctor;
2002 }
2003
2004 // Register the information for the entry associated with the constructor.
2005 Out.clear();
2006 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2007 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2008 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2009 }
2010 if (VD->getType().isDestructedType() != QualType::DK_none) {
2011 llvm::Constant *Dtor;
2012 llvm::Constant *ID;
2013 if (CGM.getLangOpts().OpenMPIsDevice) {
2014 // Generate function that emits destructor call for the threadprivate
2015 // copy of the variable VD
2016 CodeGenFunction DtorCGF(CGM);
2017
2018 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2019 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2020 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2021 FTy, Twine(Buffer, "_dtor"), FI, Loc);
2022 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2023 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2024 FunctionArgList(), Loc, Loc);
2025 // Create a scope with an artificial location for the body of this
2026 // function.
2027 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2028 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2029 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2030 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2031 DtorCGF.FinishFunction();
2032 Dtor = Fn;
2033 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2034 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2035 } else {
2036 Dtor = new llvm::GlobalVariable(
2037 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2038 llvm::GlobalValue::PrivateLinkage,
2039 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2040 ID = Dtor;
2041 }
2042 // Register the information for the entry associated with the destructor.
2043 Out.clear();
2044 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2045 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2046 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2047 }
2048 return CGM.getLangOpts().OpenMPIsDevice;
2049 }
2050
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)2051 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2052 QualType VarType,
2053 StringRef Name) {
2054 std::string Suffix = getName({"artificial", ""});
2055 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2056 llvm::Value *GAddr =
2057 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2058 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2059 CGM.getTarget().isTLSSupported()) {
2060 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2061 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2062 }
2063 std::string CacheSuffix = getName({"cache", ""});
2064 llvm::Value *Args[] = {
2065 emitUpdateLocation(CGF, SourceLocation()),
2066 getThreadID(CGF, SourceLocation()),
2067 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2068 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2069 /*isSigned=*/false),
2070 getOrCreateInternalVariable(
2071 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2072 return Address(
2073 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2074 CGF.EmitRuntimeCall(
2075 OMPBuilder.getOrCreateRuntimeFunction(
2076 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2077 Args),
2078 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2079 CGM.getContext().getTypeAlignInChars(VarType));
2080 }
2081
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)2082 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2083 const RegionCodeGenTy &ThenGen,
2084 const RegionCodeGenTy &ElseGen) {
2085 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2086
2087 // If the condition constant folds and can be elided, try to avoid emitting
2088 // the condition and the dead arm of the if/else.
2089 bool CondConstant;
2090 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2091 if (CondConstant)
2092 ThenGen(CGF);
2093 else
2094 ElseGen(CGF);
2095 return;
2096 }
2097
2098 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2099 // emit the conditional branch.
2100 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2101 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2102 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2103 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2104
2105 // Emit the 'then' code.
2106 CGF.EmitBlock(ThenBlock);
2107 ThenGen(CGF);
2108 CGF.EmitBranch(ContBlock);
2109 // Emit the 'else' code if present.
2110 // There is no need to emit line number for unconditional branch.
2111 (void)ApplyDebugLocation::CreateEmpty(CGF);
2112 CGF.EmitBlock(ElseBlock);
2113 ElseGen(CGF);
2114 // There is no need to emit line number for unconditional branch.
2115 (void)ApplyDebugLocation::CreateEmpty(CGF);
2116 CGF.EmitBranch(ContBlock);
2117 // Emit the continuation block for code after the if.
2118 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2119 }
2120
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)2121 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2122 llvm::Function *OutlinedFn,
2123 ArrayRef<llvm::Value *> CapturedVars,
2124 const Expr *IfCond) {
2125 if (!CGF.HaveInsertPoint())
2126 return;
2127 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2128 auto &M = CGM.getModule();
2129 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2130 this](CodeGenFunction &CGF, PrePostActionTy &) {
2131 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2132 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2133 llvm::Value *Args[] = {
2134 RTLoc,
2135 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2136 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2137 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2138 RealArgs.append(std::begin(Args), std::end(Args));
2139 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2140
2141 llvm::FunctionCallee RTLFn =
2142 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2143 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2144 };
2145 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2146 this](CodeGenFunction &CGF, PrePostActionTy &) {
2147 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2148 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2149 // Build calls:
2150 // __kmpc_serialized_parallel(&Loc, GTid);
2151 llvm::Value *Args[] = {RTLoc, ThreadID};
2152 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2153 M, OMPRTL___kmpc_serialized_parallel),
2154 Args);
2155
2156 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2157 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2158 Address ZeroAddrBound =
2159 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2160 /*Name=*/".bound.zero.addr");
2161 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2162 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2163 // ThreadId for serialized parallels is 0.
2164 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2165 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2166 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2167 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2168
2169 // __kmpc_end_serialized_parallel(&Loc, GTid);
2170 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2171 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2172 M, OMPRTL___kmpc_end_serialized_parallel),
2173 EndArgs);
2174 };
2175 if (IfCond) {
2176 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2177 } else {
2178 RegionCodeGenTy ThenRCG(ThenGen);
2179 ThenRCG(CGF);
2180 }
2181 }
2182
2183 // If we're inside an (outlined) parallel region, use the region info's
2184 // thread-ID variable (it is passed in a first argument of the outlined function
2185 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2186 // regular serial code region, get thread ID by calling kmp_int32
2187 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2188 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)2189 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2190 SourceLocation Loc) {
2191 if (auto *OMPRegionInfo =
2192 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2193 if (OMPRegionInfo->getThreadIDVariable())
2194 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2195
2196 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2197 QualType Int32Ty =
2198 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2199 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2200 CGF.EmitStoreOfScalar(ThreadID,
2201 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2202
2203 return ThreadIDTemp;
2204 }
2205
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)2206 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2207 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2208 SmallString<256> Buffer;
2209 llvm::raw_svector_ostream Out(Buffer);
2210 Out << Name;
2211 StringRef RuntimeName = Out.str();
2212 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2213 if (Elem.second) {
2214 assert(Elem.second->getType()->getPointerElementType() == Ty &&
2215 "OMP internal variable has different type than requested");
2216 return &*Elem.second;
2217 }
2218
2219 return Elem.second = new llvm::GlobalVariable(
2220 CGM.getModule(), Ty, /*IsConstant*/ false,
2221 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2222 Elem.first(), /*InsertBefore=*/nullptr,
2223 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2224 }
2225
getCriticalRegionLock(StringRef CriticalName)2226 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2227 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2228 std::string Name = getName({Prefix, "var"});
2229 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2230 }
2231
2232 namespace {
2233 /// Common pre(post)-action for different OpenMP constructs.
2234 class CommonActionTy final : public PrePostActionTy {
2235 llvm::FunctionCallee EnterCallee;
2236 ArrayRef<llvm::Value *> EnterArgs;
2237 llvm::FunctionCallee ExitCallee;
2238 ArrayRef<llvm::Value *> ExitArgs;
2239 bool Conditional;
2240 llvm::BasicBlock *ContBlock = nullptr;
2241
2242 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2243 CommonActionTy(llvm::FunctionCallee EnterCallee,
2244 ArrayRef<llvm::Value *> EnterArgs,
2245 llvm::FunctionCallee ExitCallee,
2246 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2247 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2248 ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2249 void Enter(CodeGenFunction &CGF) override {
2250 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2251 if (Conditional) {
2252 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2253 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2254 ContBlock = CGF.createBasicBlock("omp_if.end");
2255 // Generate the branch (If-stmt)
2256 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2257 CGF.EmitBlock(ThenBlock);
2258 }
2259 }
Done(CodeGenFunction & CGF)2260 void Done(CodeGenFunction &CGF) {
2261 // Emit the rest of blocks/branches
2262 CGF.EmitBranch(ContBlock);
2263 CGF.EmitBlock(ContBlock, true);
2264 }
Exit(CodeGenFunction & CGF)2265 void Exit(CodeGenFunction &CGF) override {
2266 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2267 }
2268 };
2269 } // anonymous namespace
2270
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2271 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2272 StringRef CriticalName,
2273 const RegionCodeGenTy &CriticalOpGen,
2274 SourceLocation Loc, const Expr *Hint) {
2275 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2276 // CriticalOpGen();
2277 // __kmpc_end_critical(ident_t *, gtid, Lock);
2278 // Prepare arguments and build a call to __kmpc_critical
2279 if (!CGF.HaveInsertPoint())
2280 return;
2281 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2282 getCriticalRegionLock(CriticalName)};
2283 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2284 std::end(Args));
2285 if (Hint) {
2286 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2287 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2288 }
2289 CommonActionTy Action(
2290 OMPBuilder.getOrCreateRuntimeFunction(
2291 CGM.getModule(),
2292 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2293 EnterArgs,
2294 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2295 OMPRTL___kmpc_end_critical),
2296 Args);
2297 CriticalOpGen.setAction(Action);
2298 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2299 }
2300
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2301 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2302 const RegionCodeGenTy &MasterOpGen,
2303 SourceLocation Loc) {
2304 if (!CGF.HaveInsertPoint())
2305 return;
2306 // if(__kmpc_master(ident_t *, gtid)) {
2307 // MasterOpGen();
2308 // __kmpc_end_master(ident_t *, gtid);
2309 // }
2310 // Prepare arguments and build a call to __kmpc_master
2311 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2312 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2313 CGM.getModule(), OMPRTL___kmpc_master),
2314 Args,
2315 OMPBuilder.getOrCreateRuntimeFunction(
2316 CGM.getModule(), OMPRTL___kmpc_end_master),
2317 Args,
2318 /*Conditional=*/true);
2319 MasterOpGen.setAction(Action);
2320 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2321 Action.Done(CGF);
2322 }
2323
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2324 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2325 SourceLocation Loc) {
2326 if (!CGF.HaveInsertPoint())
2327 return;
2328 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2329 OMPBuilder.CreateTaskyield(CGF.Builder);
2330 } else {
2331 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2332 llvm::Value *Args[] = {
2333 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2334 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2335 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2336 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2337 Args);
2338 }
2339
2340 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2341 Region->emitUntiedSwitch(CGF);
2342 }
2343
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2344 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2345 const RegionCodeGenTy &TaskgroupOpGen,
2346 SourceLocation Loc) {
2347 if (!CGF.HaveInsertPoint())
2348 return;
2349 // __kmpc_taskgroup(ident_t *, gtid);
2350 // TaskgroupOpGen();
2351 // __kmpc_end_taskgroup(ident_t *, gtid);
2352 // Prepare arguments and build a call to __kmpc_taskgroup
2353 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2354 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2355 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2356 Args,
2357 OMPBuilder.getOrCreateRuntimeFunction(
2358 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2359 Args);
2360 TaskgroupOpGen.setAction(Action);
2361 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2362 }
2363
2364 /// Given an array of pointers to variables, project the address of a
2365 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2366 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2367 unsigned Index, const VarDecl *Var) {
2368 // Pull out the pointer to the variable.
2369 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2370 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2371
2372 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2373 Addr = CGF.Builder.CreateElementBitCast(
2374 Addr, CGF.ConvertTypeForMem(Var->getType()));
2375 return Addr;
2376 }
2377
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2378 static llvm::Value *emitCopyprivateCopyFunction(
2379 CodeGenModule &CGM, llvm::Type *ArgsType,
2380 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2381 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2382 SourceLocation Loc) {
2383 ASTContext &C = CGM.getContext();
2384 // void copy_func(void *LHSArg, void *RHSArg);
2385 FunctionArgList Args;
2386 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387 ImplicitParamDecl::Other);
2388 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2389 ImplicitParamDecl::Other);
2390 Args.push_back(&LHSArg);
2391 Args.push_back(&RHSArg);
2392 const auto &CGFI =
2393 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2394 std::string Name =
2395 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2396 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2397 llvm::GlobalValue::InternalLinkage, Name,
2398 &CGM.getModule());
2399 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2400 Fn->setDoesNotRecurse();
2401 CodeGenFunction CGF(CGM);
2402 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2403 // Dest = (void*[n])(LHSArg);
2404 // Src = (void*[n])(RHSArg);
2405 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2406 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2407 ArgsType), CGF.getPointerAlign());
2408 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2410 ArgsType), CGF.getPointerAlign());
2411 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2412 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2413 // ...
2414 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2415 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2416 const auto *DestVar =
2417 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2418 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2419
2420 const auto *SrcVar =
2421 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2422 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2423
2424 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2425 QualType Type = VD->getType();
2426 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2427 }
2428 CGF.FinishFunction();
2429 return Fn;
2430 }
2431
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2432 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2433 const RegionCodeGenTy &SingleOpGen,
2434 SourceLocation Loc,
2435 ArrayRef<const Expr *> CopyprivateVars,
2436 ArrayRef<const Expr *> SrcExprs,
2437 ArrayRef<const Expr *> DstExprs,
2438 ArrayRef<const Expr *> AssignmentOps) {
2439 if (!CGF.HaveInsertPoint())
2440 return;
2441 assert(CopyprivateVars.size() == SrcExprs.size() &&
2442 CopyprivateVars.size() == DstExprs.size() &&
2443 CopyprivateVars.size() == AssignmentOps.size());
2444 ASTContext &C = CGM.getContext();
2445 // int32 did_it = 0;
2446 // if(__kmpc_single(ident_t *, gtid)) {
2447 // SingleOpGen();
2448 // __kmpc_end_single(ident_t *, gtid);
2449 // did_it = 1;
2450 // }
2451 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2452 // <copy_func>, did_it);
2453
2454 Address DidIt = Address::invalid();
2455 if (!CopyprivateVars.empty()) {
2456 // int32 did_it = 0;
2457 QualType KmpInt32Ty =
2458 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2459 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2460 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2461 }
2462 // Prepare arguments and build a call to __kmpc_single
2463 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2464 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2465 CGM.getModule(), OMPRTL___kmpc_single),
2466 Args,
2467 OMPBuilder.getOrCreateRuntimeFunction(
2468 CGM.getModule(), OMPRTL___kmpc_end_single),
2469 Args,
2470 /*Conditional=*/true);
2471 SingleOpGen.setAction(Action);
2472 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2473 if (DidIt.isValid()) {
2474 // did_it = 1;
2475 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2476 }
2477 Action.Done(CGF);
2478 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2479 // <copy_func>, did_it);
2480 if (DidIt.isValid()) {
2481 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2482 QualType CopyprivateArrayTy = C.getConstantArrayType(
2483 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2484 /*IndexTypeQuals=*/0);
2485 // Create a list of all private variables for copyprivate.
2486 Address CopyprivateList =
2487 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2488 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2489 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2490 CGF.Builder.CreateStore(
2491 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2492 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2493 CGF.VoidPtrTy),
2494 Elem);
2495 }
2496 // Build function that copies private values from single region to all other
2497 // threads in the corresponding parallel region.
2498 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
2499 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(DefaultAS),
2501 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503 Address CL =
2504 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505 CGF.VoidPtrTy);
2506 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507 llvm::Value *Args[] = {
2508 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509 getThreadID(CGF, Loc), // i32 <gtid>
2510 BufSize, // size_t <buf_size>
2511 CL.getPointer(), // void *<copyprivate list>
2512 CpyFn, // void (*) (void *, void *) <copy_func>
2513 DidItVal // i32 did_it
2514 };
2515 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517 Args);
2518 }
2519 }
2520
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2522 const RegionCodeGenTy &OrderedOpGen,
2523 SourceLocation Loc, bool IsThreads) {
2524 if (!CGF.HaveInsertPoint())
2525 return;
2526 // __kmpc_ordered(ident_t *, gtid);
2527 // OrderedOpGen();
2528 // __kmpc_end_ordered(ident_t *, gtid);
2529 // Prepare arguments and build a call to __kmpc_ordered
2530 if (IsThreads) {
2531 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533 CGM.getModule(), OMPRTL___kmpc_ordered),
2534 Args,
2535 OMPBuilder.getOrCreateRuntimeFunction(
2536 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537 Args);
2538 OrderedOpGen.setAction(Action);
2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 return;
2541 }
2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2546 unsigned Flags;
2547 if (Kind == OMPD_for)
2548 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549 else if (Kind == OMPD_sections)
2550 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551 else if (Kind == OMPD_single)
2552 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553 else if (Kind == OMPD_barrier)
2554 Flags = OMP_IDENT_BARRIER_EXPL;
2555 else
2556 Flags = OMP_IDENT_BARRIER_IMPL;
2557 return Flags;
2558 }
2559
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2561 CodeGenFunction &CGF, const OMPLoopDirective &S,
2562 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563 // Check if the loop directive is actually a doacross loop directive. In this
2564 // case choose static, 1 schedule.
2565 if (llvm::any_of(
2566 S.getClausesOfKind<OMPOrderedClause>(),
2567 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568 ScheduleKind = OMPC_SCHEDULE_static;
2569 // Chunk size is 1 in this case.
2570 llvm::APInt ChunkSize(32, 1);
2571 ChunkExpr = IntegerLiteral::Create(
2572 CGF.getContext(), ChunkSize,
2573 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574 SourceLocation());
2575 }
2576 }
2577
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2579 OpenMPDirectiveKind Kind, bool EmitChecks,
2580 bool ForceSimpleCall) {
2581 // Check if we should use the OMPBuilder
2582 auto *OMPRegionInfo =
2583 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2586 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587 return;
2588 }
2589
2590 if (!CGF.HaveInsertPoint())
2591 return;
2592 // Build call __kmpc_cancel_barrier(loc, thread_id);
2593 // Build call __kmpc_barrier(loc, thread_id);
2594 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596 // thread_id);
2597 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598 getThreadID(CGF, Loc)};
2599 if (OMPRegionInfo) {
2600 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601 llvm::Value *Result = CGF.EmitRuntimeCall(
2602 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603 OMPRTL___kmpc_cancel_barrier),
2604 Args);
2605 if (EmitChecks) {
2606 // if (__kmpc_cancel_barrier()) {
2607 // exit from construct;
2608 // }
2609 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613 CGF.EmitBlock(ExitBB);
2614 // exit from construct;
2615 CodeGenFunction::JumpDest CancelDestination =
2616 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617 CGF.EmitBranchThroughCleanup(CancelDestination);
2618 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619 }
2620 return;
2621 }
2622 }
2623 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624 CGM.getModule(), OMPRTL___kmpc_barrier),
2625 Args);
2626 }
2627
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630 bool Chunked, bool Ordered) {
2631 switch (ScheduleKind) {
2632 case OMPC_SCHEDULE_static:
2633 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634 : (Ordered ? OMP_ord_static : OMP_sch_static);
2635 case OMPC_SCHEDULE_dynamic:
2636 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637 case OMPC_SCHEDULE_guided:
2638 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639 case OMPC_SCHEDULE_runtime:
2640 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641 case OMPC_SCHEDULE_auto:
2642 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643 case OMPC_SCHEDULE_unknown:
2644 assert(!Chunked && "chunk was specified but schedule kind not known");
2645 return Ordered ? OMP_ord_static : OMP_sch_static;
2646 }
2647 llvm_unreachable("Unexpected runtime schedule");
2648 }
2649
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2653 // only static is allowed for dist_schedule
2654 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2658 bool Chunked) const {
2659 OpenMPSchedType Schedule =
2660 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661 return Schedule == OMP_sch_static;
2662 }
2663
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2664 bool CGOpenMPRuntime::isStaticNonchunked(
2665 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667 return Schedule == OMP_dist_sch_static;
2668 }
2669
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2671 bool Chunked) const {
2672 OpenMPSchedType Schedule =
2673 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674 return Schedule == OMP_sch_static_chunked;
2675 }
2676
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2677 bool CGOpenMPRuntime::isStaticChunked(
2678 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680 return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2684 OpenMPSchedType Schedule =
2685 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687 return Schedule != OMP_sch_static;
2688 }
2689
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2691 OpenMPScheduleClauseModifier M1,
2692 OpenMPScheduleClauseModifier M2) {
2693 int Modifier = 0;
2694 switch (M1) {
2695 case OMPC_SCHEDULE_MODIFIER_monotonic:
2696 Modifier = OMP_sch_modifier_monotonic;
2697 break;
2698 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699 Modifier = OMP_sch_modifier_nonmonotonic;
2700 break;
2701 case OMPC_SCHEDULE_MODIFIER_simd:
2702 if (Schedule == OMP_sch_static_chunked)
2703 Schedule = OMP_sch_static_balanced_chunked;
2704 break;
2705 case OMPC_SCHEDULE_MODIFIER_last:
2706 case OMPC_SCHEDULE_MODIFIER_unknown:
2707 break;
2708 }
2709 switch (M2) {
2710 case OMPC_SCHEDULE_MODIFIER_monotonic:
2711 Modifier = OMP_sch_modifier_monotonic;
2712 break;
2713 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714 Modifier = OMP_sch_modifier_nonmonotonic;
2715 break;
2716 case OMPC_SCHEDULE_MODIFIER_simd:
2717 if (Schedule == OMP_sch_static_chunked)
2718 Schedule = OMP_sch_static_balanced_chunked;
2719 break;
2720 case OMPC_SCHEDULE_MODIFIER_last:
2721 case OMPC_SCHEDULE_MODIFIER_unknown:
2722 break;
2723 }
2724 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725 // If the static schedule kind is specified or if the ordered clause is
2726 // specified, and if the nonmonotonic modifier is not specified, the effect is
2727 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728 // modifier is specified, the effect is as if the nonmonotonic modifier is
2729 // specified.
2730 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732 Schedule == OMP_sch_static_balanced_chunked ||
2733 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734 Schedule == OMP_dist_sch_static_chunked ||
2735 Schedule == OMP_dist_sch_static))
2736 Modifier = OMP_sch_modifier_nonmonotonic;
2737 }
2738 return Schedule | Modifier;
2739 }
2740
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2741 void CGOpenMPRuntime::emitForDispatchInit(
2742 CodeGenFunction &CGF, SourceLocation Loc,
2743 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744 bool Ordered, const DispatchRTInput &DispatchValues) {
2745 if (!CGF.HaveInsertPoint())
2746 return;
2747 OpenMPSchedType Schedule = getRuntimeSchedule(
2748 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749 assert(Ordered ||
2750 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752 Schedule != OMP_sch_static_balanced_chunked));
2753 // Call __kmpc_dispatch_init(
2754 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2756 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757
2758 // If the Chunk was not specified in the clause - use default value 1.
2759 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760 : CGF.Builder.getIntN(IVSize, 1);
2761 llvm::Value *Args[] = {
2762 emitUpdateLocation(CGF, Loc),
2763 getThreadID(CGF, Loc),
2764 CGF.Builder.getInt32(addMonoNonMonoModifier(
2765 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766 DispatchValues.LB, // Lower
2767 DispatchValues.UB, // Upper
2768 CGF.Builder.getIntN(IVSize, 1), // Stride
2769 Chunk // Chunk
2770 };
2771 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2774 static void emitForStaticInitCall(
2775 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2777 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2778 const CGOpenMPRuntime::StaticRTInput &Values) {
2779 if (!CGF.HaveInsertPoint())
2780 return;
2781
2782 assert(!Values.Ordered);
2783 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784 Schedule == OMP_sch_static_balanced_chunked ||
2785 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786 Schedule == OMP_dist_sch_static ||
2787 Schedule == OMP_dist_sch_static_chunked);
2788
2789 // Call __kmpc_for_static_init(
2790 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794 llvm::Value *Chunk = Values.Chunk;
2795 if (Chunk == nullptr) {
2796 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797 Schedule == OMP_dist_sch_static) &&
2798 "expected static non-chunked schedule");
2799 // If the Chunk was not specified in the clause - use default value 1.
2800 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801 } else {
2802 assert((Schedule == OMP_sch_static_chunked ||
2803 Schedule == OMP_sch_static_balanced_chunked ||
2804 Schedule == OMP_ord_static_chunked ||
2805 Schedule == OMP_dist_sch_static_chunked) &&
2806 "expected static chunked schedule");
2807 }
2808 llvm::Value *Args[] = {
2809 UpdateLocation,
2810 ThreadId,
2811 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812 M2)), // Schedule type
2813 Values.IL.getPointer(), // &isLastIter
2814 Values.LB.getPointer(), // &LB
2815 Values.UB.getPointer(), // &UB
2816 Values.ST.getPointer(), // &Stride
2817 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2818 Chunk // Chunk
2819 };
2820 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2824 SourceLocation Loc,
2825 OpenMPDirectiveKind DKind,
2826 const OpenMPScheduleTy &ScheduleKind,
2827 const StaticRTInput &Values) {
2828 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830 assert(isOpenMPWorksharingDirective(DKind) &&
2831 "Expected loop-based or sections-based directive.");
2832 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833 isOpenMPLoopDirective(DKind)
2834 ? OMP_IDENT_WORK_LOOP
2835 : OMP_IDENT_WORK_SECTIONS);
2836 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837 llvm::FunctionCallee StaticInitFunction =
2838 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2839 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2844 void CGOpenMPRuntime::emitDistributeStaticInit(
2845 CodeGenFunction &CGF, SourceLocation Loc,
2846 OpenMPDistScheduleClauseKind SchedKind,
2847 const CGOpenMPRuntime::StaticRTInput &Values) {
2848 OpenMPSchedType ScheduleNum =
2849 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850 llvm::Value *UpdatedLocation =
2851 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853 llvm::FunctionCallee StaticInitFunction =
2854 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2855 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2856 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2857 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2858 }
2859
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2860 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2861 SourceLocation Loc,
2862 OpenMPDirectiveKind DKind) {
2863 if (!CGF.HaveInsertPoint())
2864 return;
2865 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2866 llvm::Value *Args[] = {
2867 emitUpdateLocation(CGF, Loc,
2868 isOpenMPDistributeDirective(DKind)
2869 ? OMP_IDENT_WORK_DISTRIBUTE
2870 : isOpenMPLoopDirective(DKind)
2871 ? OMP_IDENT_WORK_LOOP
2872 : OMP_IDENT_WORK_SECTIONS),
2873 getThreadID(CGF, Loc)};
2874 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2875 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2876 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2877 Args);
2878 }
2879
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2880 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2881 SourceLocation Loc,
2882 unsigned IVSize,
2883 bool IVSigned) {
2884 if (!CGF.HaveInsertPoint())
2885 return;
2886 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2887 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2888 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2889 }
2890
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2891 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2892 SourceLocation Loc, unsigned IVSize,
2893 bool IVSigned, Address IL,
2894 Address LB, Address UB,
2895 Address ST) {
2896 // Call __kmpc_dispatch_next(
2897 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2898 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2899 // kmp_int[32|64] *p_stride);
2900 llvm::Value *Args[] = {
2901 emitUpdateLocation(CGF, Loc),
2902 getThreadID(CGF, Loc),
2903 IL.getPointer(), // &isLastIter
2904 LB.getPointer(), // &Lower
2905 UB.getPointer(), // &Upper
2906 ST.getPointer() // &Stride
2907 };
2908 llvm::Value *Call =
2909 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2910 return CGF.EmitScalarConversion(
2911 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2912 CGF.getContext().BoolTy, Loc);
2913 }
2914
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2915 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2916 llvm::Value *NumThreads,
2917 SourceLocation Loc) {
2918 if (!CGF.HaveInsertPoint())
2919 return;
2920 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2921 llvm::Value *Args[] = {
2922 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2923 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2924 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2925 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2926 Args);
2927 }
2928
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2929 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2930 ProcBindKind ProcBind,
2931 SourceLocation Loc) {
2932 if (!CGF.HaveInsertPoint())
2933 return;
2934 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2935 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2936 llvm::Value *Args[] = {
2937 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2939 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2940 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2941 Args);
2942 }
2943
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2944 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2945 SourceLocation Loc, llvm::AtomicOrdering AO) {
2946 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2947 OMPBuilder.CreateFlush(CGF.Builder);
2948 } else {
2949 if (!CGF.HaveInsertPoint())
2950 return;
2951 // Build call void __kmpc_flush(ident_t *loc)
2952 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2953 CGM.getModule(), OMPRTL___kmpc_flush),
2954 emitUpdateLocation(CGF, Loc));
2955 }
2956 }
2957
2958 namespace {
2959 /// Indexes of fields for type kmp_task_t.
2960 enum KmpTaskTFields {
2961 /// List of shared variables.
2962 KmpTaskTShareds,
2963 /// Task routine.
2964 KmpTaskTRoutine,
2965 /// Partition id for the untied tasks.
2966 KmpTaskTPartId,
2967 /// Function with call of destructors for private variables.
2968 Data1,
2969 /// Task priority.
2970 Data2,
2971 /// (Taskloops only) Lower bound.
2972 KmpTaskTLowerBound,
2973 /// (Taskloops only) Upper bound.
2974 KmpTaskTUpperBound,
2975 /// (Taskloops only) Stride.
2976 KmpTaskTStride,
2977 /// (Taskloops only) Is last iteration flag.
2978 KmpTaskTLastIter,
2979 /// (Taskloops only) Reduction data.
2980 KmpTaskTReductions,
2981 };
2982 } // anonymous namespace
2983
empty() const2984 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2985 return OffloadEntriesTargetRegion.empty() &&
2986 OffloadEntriesDeviceGlobalVar.empty();
2987 }
2988
2989 /// Initialize target region entry.
2990 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2991 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2992 StringRef ParentName, unsigned LineNum,
2993 unsigned Order) {
2994 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2995 "only required for the device "
2996 "code generation.");
2997 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2998 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2999 OMPTargetRegionEntryTargetRegion);
3000 ++OffloadingEntriesNum;
3001 }
3002
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)3004 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3005 StringRef ParentName, unsigned LineNum,
3006 llvm::Constant *Addr, llvm::Constant *ID,
3007 OMPTargetRegionEntryKind Flags) {
3008 // If we are emitting code for a target, the entry is already initialized,
3009 // only has to be registered.
3010 if (CGM.getLangOpts().OpenMPIsDevice) {
3011 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3012 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3013 DiagnosticsEngine::Error,
3014 "Unable to find target region on line '%0' in the device code.");
3015 CGM.getDiags().Report(DiagID) << LineNum;
3016 return;
3017 }
3018 auto &Entry =
3019 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3020 assert(Entry.isValid() && "Entry not initialized!");
3021 Entry.setAddress(Addr);
3022 Entry.setID(ID);
3023 Entry.setFlags(Flags);
3024 } else {
3025 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3026 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3027 ++OffloadingEntriesNum;
3028 }
3029 }
3030
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum) const3031 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3032 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3033 unsigned LineNum) const {
3034 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3035 if (PerDevice == OffloadEntriesTargetRegion.end())
3036 return false;
3037 auto PerFile = PerDevice->second.find(FileID);
3038 if (PerFile == PerDevice->second.end())
3039 return false;
3040 auto PerParentName = PerFile->second.find(ParentName);
3041 if (PerParentName == PerFile->second.end())
3042 return false;
3043 auto PerLine = PerParentName->second.find(LineNum);
3044 if (PerLine == PerParentName->second.end())
3045 return false;
3046 // Fail if this entry is already registered.
3047 if (PerLine->second.getAddress() || PerLine->second.getID())
3048 return false;
3049 return true;
3050 }
3051
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)3052 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3053 const OffloadTargetRegionEntryInfoActTy &Action) {
3054 // Scan all target region entries and perform the provided action.
3055 for (const auto &D : OffloadEntriesTargetRegion)
3056 for (const auto &F : D.second)
3057 for (const auto &P : F.second)
3058 for (const auto &L : P.second)
3059 Action(D.first, F.first, P.first(), L.first, L.second);
3060 }
3061
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)3063 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3064 OMPTargetGlobalVarEntryKind Flags,
3065 unsigned Order) {
3066 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3067 "only required for the device "
3068 "code generation.");
3069 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3070 ++OffloadingEntriesNum;
3071 }
3072
3073 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)3074 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3075 CharUnits VarSize,
3076 OMPTargetGlobalVarEntryKind Flags,
3077 llvm::GlobalValue::LinkageTypes Linkage) {
3078 if (CGM.getLangOpts().OpenMPIsDevice) {
3079 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3080 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3081 "Entry not initialized!");
3082 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3083 "Resetting with the new address.");
3084 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3085 if (Entry.getVarSize().isZero()) {
3086 Entry.setVarSize(VarSize);
3087 Entry.setLinkage(Linkage);
3088 }
3089 return;
3090 }
3091 Entry.setVarSize(VarSize);
3092 Entry.setLinkage(Linkage);
3093 Entry.setAddress(Addr);
3094 } else {
3095 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3096 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3097 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3098 "Entry not initialized!");
3099 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3100 "Resetting with the new address.");
3101 if (Entry.getVarSize().isZero()) {
3102 Entry.setVarSize(VarSize);
3103 Entry.setLinkage(Linkage);
3104 }
3105 return;
3106 }
3107 OffloadEntriesDeviceGlobalVar.try_emplace(
3108 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3109 ++OffloadingEntriesNum;
3110 }
3111 }
3112
3113 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)3114 actOnDeviceGlobalVarEntriesInfo(
3115 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3116 // Scan all target region entries and perform the provided action.
3117 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3118 Action(E.getKey(), E.getValue());
3119 }
3120
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)3121 void CGOpenMPRuntime::createOffloadEntry(
3122 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3123 llvm::GlobalValue::LinkageTypes Linkage) {
3124 StringRef Name = Addr->getName();
3125 llvm::Module &M = CGM.getModule();
3126 llvm::LLVMContext &C = M.getContext();
3127
3128 // Create constant string with the name.
3129 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3130
3131 std::string StringName = getName({"omp_offloading", "entry_name"});
3132 auto *Str = new llvm::GlobalVariable(
3133 M, StrPtrInit->getType(), /*isConstant=*/true,
3134 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3135 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3136
3137 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3138 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3139 llvm::ConstantInt::get(CGM.SizeTy, Size),
3140 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3141 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3142 std::string EntryName = getName({"omp_offloading", "entry", ""});
3143 llvm::GlobalVariable *Entry = createGlobalStruct(
3144 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3145 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3146
3147 // The entry has to be created in the section the linker expects it to be.
3148 Entry->setSection("omp_offloading_entries");
3149 }
3150
createOffloadEntriesAndInfoMetadata()3151 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3152 // Emit the offloading entries and metadata so that the device codegen side
3153 // can easily figure out what to emit. The produced metadata looks like
3154 // this:
3155 //
3156 // !omp_offload.info = !{!1, ...}
3157 //
3158 // Right now we only generate metadata for function that contain target
3159 // regions.
3160
3161 // If we are in simd mode or there are no entries, we don't need to do
3162 // anything.
3163 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3164 return;
3165
3166 llvm::Module &M = CGM.getModule();
3167 llvm::LLVMContext &C = M.getContext();
3168 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3169 SourceLocation, StringRef>,
3170 16>
3171 OrderedEntries(OffloadEntriesInfoManager.size());
3172 llvm::SmallVector<StringRef, 16> ParentFunctions(
3173 OffloadEntriesInfoManager.size());
3174
3175 // Auxiliary methods to create metadata values and strings.
3176 auto &&GetMDInt = [this](unsigned V) {
3177 return llvm::ConstantAsMetadata::get(
3178 llvm::ConstantInt::get(CGM.Int32Ty, V));
3179 };
3180
3181 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3182
3183 // Create the offloading info metadata node.
3184 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3185
3186 // Create function that emits metadata for each target region entry;
3187 auto &&TargetRegionMetadataEmitter =
3188 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3189 &GetMDString](
3190 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3191 unsigned Line,
3192 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3193 // Generate metadata for target regions. Each entry of this metadata
3194 // contains:
3195 // - Entry 0 -> Kind of this type of metadata (0).
3196 // - Entry 1 -> Device ID of the file where the entry was identified.
3197 // - Entry 2 -> File ID of the file where the entry was identified.
3198 // - Entry 3 -> Mangled name of the function where the entry was
3199 // identified.
3200 // - Entry 4 -> Line in the file where the entry was identified.
3201 // - Entry 5 -> Order the entry was created.
3202 // The first element of the metadata node is the kind.
3203 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3204 GetMDInt(FileID), GetMDString(ParentName),
3205 GetMDInt(Line), GetMDInt(E.getOrder())};
3206
3207 SourceLocation Loc;
3208 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3209 E = CGM.getContext().getSourceManager().fileinfo_end();
3210 I != E; ++I) {
3211 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3212 I->getFirst()->getUniqueID().getFile() == FileID) {
3213 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3214 I->getFirst(), Line, 1);
3215 break;
3216 }
3217 }
3218 // Save this entry in the right position of the ordered entries array.
3219 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3220 ParentFunctions[E.getOrder()] = ParentName;
3221
3222 // Add metadata to the named metadata node.
3223 MD->addOperand(llvm::MDNode::get(C, Ops));
3224 };
3225
3226 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3227 TargetRegionMetadataEmitter);
3228
3229 // Create function that emits metadata for each device global variable entry;
3230 auto &&DeviceGlobalVarMetadataEmitter =
3231 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3232 MD](StringRef MangledName,
3233 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3234 &E) {
3235 // Generate metadata for global variables. Each entry of this metadata
3236 // contains:
3237 // - Entry 0 -> Kind of this type of metadata (1).
3238 // - Entry 1 -> Mangled name of the variable.
3239 // - Entry 2 -> Declare target kind.
3240 // - Entry 3 -> Order the entry was created.
3241 // The first element of the metadata node is the kind.
3242 llvm::Metadata *Ops[] = {
3243 GetMDInt(E.getKind()), GetMDString(MangledName),
3244 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3245
3246 // Save this entry in the right position of the ordered entries array.
3247 OrderedEntries[E.getOrder()] =
3248 std::make_tuple(&E, SourceLocation(), MangledName);
3249
3250 // Add metadata to the named metadata node.
3251 MD->addOperand(llvm::MDNode::get(C, Ops));
3252 };
3253
3254 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3255 DeviceGlobalVarMetadataEmitter);
3256
3257 for (const auto &E : OrderedEntries) {
3258 assert(std::get<0>(E) && "All ordered entries must exist!");
3259 if (const auto *CE =
3260 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3261 std::get<0>(E))) {
3262 if (!CE->getID() || !CE->getAddress()) {
3263 // Do not blame the entry if the parent funtion is not emitted.
3264 StringRef FnName = ParentFunctions[CE->getOrder()];
3265 if (!CGM.GetGlobalValue(FnName))
3266 continue;
3267 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3268 DiagnosticsEngine::Error,
3269 "Offloading entry for target region in %0 is incorrect: either the "
3270 "address or the ID is invalid.");
3271 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3272 continue;
3273 }
3274 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3275 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3276 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3277 OffloadEntryInfoDeviceGlobalVar>(
3278 std::get<0>(E))) {
3279 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3280 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3281 CE->getFlags());
3282 switch (Flags) {
3283 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3284 if (CGM.getLangOpts().OpenMPIsDevice &&
3285 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3286 continue;
3287 if (!CE->getAddress()) {
3288 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3289 DiagnosticsEngine::Error, "Offloading entry for declare target "
3290 "variable %0 is incorrect: the "
3291 "address is invalid.");
3292 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3293 continue;
3294 }
3295 // The vaiable has no definition - no need to add the entry.
3296 if (CE->getVarSize().isZero())
3297 continue;
3298 break;
3299 }
3300 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3301 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3302 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3303 "Declaret target link address is set.");
3304 if (CGM.getLangOpts().OpenMPIsDevice)
3305 continue;
3306 if (!CE->getAddress()) {
3307 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3308 DiagnosticsEngine::Error,
3309 "Offloading entry for declare target variable is incorrect: the "
3310 "address is invalid.");
3311 CGM.getDiags().Report(DiagID);
3312 continue;
3313 }
3314 break;
3315 }
3316 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3317 CE->getVarSize().getQuantity(), Flags,
3318 CE->getLinkage());
3319 } else {
3320 llvm_unreachable("Unsupported entry kind.");
3321 }
3322 }
3323 }
3324
3325 /// Loads all the offload entries information from the host IR
3326 /// metadata.
loadOffloadInfoMetadata()3327 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3328 // If we are in target mode, load the metadata from the host IR. This code has
3329 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3330
3331 if (!CGM.getLangOpts().OpenMPIsDevice)
3332 return;
3333
3334 if (CGM.getLangOpts().OMPHostIRFile.empty())
3335 return;
3336
3337 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3338 if (auto EC = Buf.getError()) {
3339 CGM.getDiags().Report(diag::err_cannot_open_file)
3340 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3341 return;
3342 }
3343
3344 llvm::LLVMContext C;
3345 auto ME = expectedToErrorOrAndEmitErrors(
3346 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3347
3348 if (auto EC = ME.getError()) {
3349 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3350 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3351 CGM.getDiags().Report(DiagID)
3352 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3353 return;
3354 }
3355
3356 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3357 if (!MD)
3358 return;
3359
3360 for (llvm::MDNode *MN : MD->operands()) {
3361 auto &&GetMDInt = [MN](unsigned Idx) {
3362 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3363 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3364 };
3365
3366 auto &&GetMDString = [MN](unsigned Idx) {
3367 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3368 return V->getString();
3369 };
3370
3371 switch (GetMDInt(0)) {
3372 default:
3373 llvm_unreachable("Unexpected metadata!");
3374 break;
3375 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3376 OffloadingEntryInfoTargetRegion:
3377 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3378 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3379 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3380 /*Order=*/GetMDInt(5));
3381 break;
3382 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3383 OffloadingEntryInfoDeviceGlobalVar:
3384 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3385 /*MangledName=*/GetMDString(1),
3386 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3387 /*Flags=*/GetMDInt(2)),
3388 /*Order=*/GetMDInt(3));
3389 break;
3390 }
3391 }
3392 }
3393
emitKmpRoutineEntryT(QualType KmpInt32Ty)3394 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3395 if (!KmpRoutineEntryPtrTy) {
3396 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3397 ASTContext &C = CGM.getContext();
3398 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3399 FunctionProtoType::ExtProtoInfo EPI;
3400 KmpRoutineEntryPtrQTy = C.getPointerType(
3401 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3402 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3403 }
3404 }
3405
getTgtOffloadEntryQTy()3406 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3407 // Make sure the type of the entry is already created. This is the type we
3408 // have to create:
3409 // struct __tgt_offload_entry{
3410 // void *addr; // Pointer to the offload entry info.
3411 // // (function or global)
3412 // char *name; // Name of the function or global.
3413 // size_t size; // Size of the entry info (0 if it a function).
3414 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3415 // int32_t reserved; // Reserved, to use by the runtime library.
3416 // };
3417 if (TgtOffloadEntryQTy.isNull()) {
3418 ASTContext &C = CGM.getContext();
3419 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3420 RD->startDefinition();
3421 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3422 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3423 addFieldToRecordDecl(C, RD, C.getSizeType());
3424 addFieldToRecordDecl(
3425 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3426 addFieldToRecordDecl(
3427 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3428 RD->completeDefinition();
3429 RD->addAttr(PackedAttr::CreateImplicit(C));
3430 TgtOffloadEntryQTy = C.getRecordType(RD);
3431 }
3432 return TgtOffloadEntryQTy;
3433 }
3434
3435 namespace {
3436 struct PrivateHelpersTy {
PrivateHelpersTy__anone0633a091611::PrivateHelpersTy3437 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3438 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3439 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3440 PrivateElemInit(PrivateElemInit) {}
3441 const Expr *OriginalRef = nullptr;
3442 const VarDecl *Original = nullptr;
3443 const VarDecl *PrivateCopy = nullptr;
3444 const VarDecl *PrivateElemInit = nullptr;
3445 };
3446 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3447 } // anonymous namespace
3448
3449 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3450 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3451 if (!Privates.empty()) {
3452 ASTContext &C = CGM.getContext();
3453 // Build struct .kmp_privates_t. {
3454 // /* private vars */
3455 // };
3456 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3457 RD->startDefinition();
3458 for (const auto &Pair : Privates) {
3459 const VarDecl *VD = Pair.second.Original;
3460 QualType Type = VD->getType().getNonReferenceType();
3461 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3462 if (VD->hasAttrs()) {
3463 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3464 E(VD->getAttrs().end());
3465 I != E; ++I)
3466 FD->addAttr(*I);
3467 }
3468 }
3469 RD->completeDefinition();
3470 return RD;
3471 }
3472 return nullptr;
3473 }
3474
3475 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3476 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3477 QualType KmpInt32Ty,
3478 QualType KmpRoutineEntryPointerQTy) {
3479 ASTContext &C = CGM.getContext();
3480 // Build struct kmp_task_t {
3481 // void * shareds;
3482 // kmp_routine_entry_t routine;
3483 // kmp_int32 part_id;
3484 // kmp_cmplrdata_t data1;
3485 // kmp_cmplrdata_t data2;
3486 // For taskloops additional fields:
3487 // kmp_uint64 lb;
3488 // kmp_uint64 ub;
3489 // kmp_int64 st;
3490 // kmp_int32 liter;
3491 // void * reductions;
3492 // };
3493 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3494 UD->startDefinition();
3495 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3496 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3497 UD->completeDefinition();
3498 QualType KmpCmplrdataTy = C.getRecordType(UD);
3499 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3500 RD->startDefinition();
3501 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3502 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3503 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3504 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3505 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3506 if (isOpenMPTaskLoopDirective(Kind)) {
3507 QualType KmpUInt64Ty =
3508 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3509 QualType KmpInt64Ty =
3510 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3511 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3512 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3513 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3514 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3515 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3516 }
3517 RD->completeDefinition();
3518 return RD;
3519 }
3520
3521 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3522 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3523 ArrayRef<PrivateDataTy> Privates) {
3524 ASTContext &C = CGM.getContext();
3525 // Build struct kmp_task_t_with_privates {
3526 // kmp_task_t task_data;
3527 // .kmp_privates_t. privates;
3528 // };
3529 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3530 RD->startDefinition();
3531 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3532 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3533 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3534 RD->completeDefinition();
3535 return RD;
3536 }
3537
3538 /// Emit a proxy function which accepts kmp_task_t as the second
3539 /// argument.
3540 /// \code
3541 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3542 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3543 /// For taskloops:
3544 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3545 /// tt->reductions, tt->shareds);
3546 /// return 0;
3547 /// }
3548 /// \endcode
3549 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)3550 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3551 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3552 QualType KmpTaskTWithPrivatesPtrQTy,
3553 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3554 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3555 llvm::Value *TaskPrivatesMap) {
3556 ASTContext &C = CGM.getContext();
3557 FunctionArgList Args;
3558 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3559 ImplicitParamDecl::Other);
3560 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3561 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3562 ImplicitParamDecl::Other);
3563 Args.push_back(&GtidArg);
3564 Args.push_back(&TaskTypeArg);
3565 const auto &TaskEntryFnInfo =
3566 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3567 llvm::FunctionType *TaskEntryTy =
3568 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3569 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3570 auto *TaskEntry = llvm::Function::Create(
3571 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3572 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3573 TaskEntry->setDoesNotRecurse();
3574 CodeGenFunction CGF(CGM);
3575 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3576 Loc, Loc);
3577
3578 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3579 // tt,
3580 // For taskloops:
3581 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3582 // tt->task_data.shareds);
3583 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3584 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3585 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3586 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3587 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3588 const auto *KmpTaskTWithPrivatesQTyRD =
3589 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3590 LValue Base =
3591 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3592 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3593 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3594 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3595 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3596
3597 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3598 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3599 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3600 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3601 CGF.ConvertTypeForMem(SharedsPtrTy));
3602
3603 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3604 llvm::Value *PrivatesParam;
3605 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3606 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3607 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3608 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3609 } else {
3610 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3611 }
3612
3613 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3614 TaskPrivatesMap,
3615 CGF.Builder
3616 .CreatePointerBitCastOrAddrSpaceCast(
3617 TDBase.getAddress(CGF), CGF.VoidPtrTy)
3618 .getPointer()};
3619 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3620 std::end(CommonArgs));
3621 if (isOpenMPTaskLoopDirective(Kind)) {
3622 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3623 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3624 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3625 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3626 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3627 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3628 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3629 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3630 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3631 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3632 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3633 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3634 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3635 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3636 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3637 CallArgs.push_back(LBParam);
3638 CallArgs.push_back(UBParam);
3639 CallArgs.push_back(StParam);
3640 CallArgs.push_back(LIParam);
3641 CallArgs.push_back(RParam);
3642 }
3643 CallArgs.push_back(SharedsParam);
3644
3645 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3646 CallArgs);
3647 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3648 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3649 CGF.FinishFunction();
3650 return TaskEntry;
3651 }
3652
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3653 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3654 SourceLocation Loc,
3655 QualType KmpInt32Ty,
3656 QualType KmpTaskTWithPrivatesPtrQTy,
3657 QualType KmpTaskTWithPrivatesQTy) {
3658 ASTContext &C = CGM.getContext();
3659 FunctionArgList Args;
3660 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3661 ImplicitParamDecl::Other);
3662 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3663 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3664 ImplicitParamDecl::Other);
3665 Args.push_back(&GtidArg);
3666 Args.push_back(&TaskTypeArg);
3667 const auto &DestructorFnInfo =
3668 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3669 llvm::FunctionType *DestructorFnTy =
3670 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3671 std::string Name =
3672 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3673 auto *DestructorFn =
3674 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3675 Name, &CGM.getModule());
3676 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3677 DestructorFnInfo);
3678 DestructorFn->setDoesNotRecurse();
3679 CodeGenFunction CGF(CGM);
3680 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3681 Args, Loc, Loc);
3682
3683 LValue Base = CGF.EmitLoadOfPointerLValue(
3684 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3685 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3686 const auto *KmpTaskTWithPrivatesQTyRD =
3687 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3688 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3689 Base = CGF.EmitLValueForField(Base, *FI);
3690 for (const auto *Field :
3691 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3692 if (QualType::DestructionKind DtorKind =
3693 Field->getType().isDestructedType()) {
3694 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3695 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3696 }
3697 }
3698 CGF.FinishFunction();
3699 return DestructorFn;
3700 }
3701
3702 /// Emit a privates mapping function for correct handling of private and
3703 /// firstprivate variables.
3704 /// \code
3705 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3706 /// **noalias priv1,..., <tyn> **noalias privn) {
3707 /// *priv1 = &.privates.priv1;
3708 /// ...;
3709 /// *privn = &.privates.privn;
3710 /// }
3711 /// \endcode
3712 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,ArrayRef<const Expr * > PrivateVars,ArrayRef<const Expr * > FirstprivateVars,ArrayRef<const Expr * > LastprivateVars,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3713 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3714 ArrayRef<const Expr *> PrivateVars,
3715 ArrayRef<const Expr *> FirstprivateVars,
3716 ArrayRef<const Expr *> LastprivateVars,
3717 QualType PrivatesQTy,
3718 ArrayRef<PrivateDataTy> Privates) {
3719 ASTContext &C = CGM.getContext();
3720 FunctionArgList Args;
3721 ImplicitParamDecl TaskPrivatesArg(
3722 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3723 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3724 ImplicitParamDecl::Other);
3725 Args.push_back(&TaskPrivatesArg);
3726 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3727 unsigned Counter = 1;
3728 for (const Expr *E : PrivateVars) {
3729 Args.push_back(ImplicitParamDecl::Create(
3730 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3731 C.getPointerType(C.getPointerType(E->getType()))
3732 .withConst()
3733 .withRestrict(),
3734 ImplicitParamDecl::Other));
3735 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3736 PrivateVarsPos[VD] = Counter;
3737 ++Counter;
3738 }
3739 for (const Expr *E : FirstprivateVars) {
3740 Args.push_back(ImplicitParamDecl::Create(
3741 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3742 C.getPointerType(C.getPointerType(E->getType()))
3743 .withConst()
3744 .withRestrict(),
3745 ImplicitParamDecl::Other));
3746 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3747 PrivateVarsPos[VD] = Counter;
3748 ++Counter;
3749 }
3750 for (const Expr *E : LastprivateVars) {
3751 Args.push_back(ImplicitParamDecl::Create(
3752 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3753 C.getPointerType(C.getPointerType(E->getType()))
3754 .withConst()
3755 .withRestrict(),
3756 ImplicitParamDecl::Other));
3757 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3758 PrivateVarsPos[VD] = Counter;
3759 ++Counter;
3760 }
3761 const auto &TaskPrivatesMapFnInfo =
3762 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3763 llvm::FunctionType *TaskPrivatesMapTy =
3764 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3765 std::string Name =
3766 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3767 auto *TaskPrivatesMap = llvm::Function::Create(
3768 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3769 &CGM.getModule());
3770 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3771 TaskPrivatesMapFnInfo);
3772 if (CGM.getLangOpts().Optimize) {
3773 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3774 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3775 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3776 }
3777 CodeGenFunction CGF(CGM);
3778 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3779 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3780
3781 // *privi = &.privates.privi;
3782 LValue Base = CGF.EmitLoadOfPointerLValue(
3783 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3784 TaskPrivatesArg.getType()->castAs<PointerType>());
3785 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3786 Counter = 0;
3787 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3788 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3789 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3790 LValue RefLVal =
3791 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3792 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3793 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3794 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3795 ++Counter;
3796 }
3797 CGF.FinishFunction();
3798 return TaskPrivatesMap;
3799 }
3800
3801 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3802 static void emitPrivatesInit(CodeGenFunction &CGF,
3803 const OMPExecutableDirective &D,
3804 Address KmpTaskSharedsPtr, LValue TDBase,
3805 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3806 QualType SharedsTy, QualType SharedsPtrTy,
3807 const OMPTaskDataTy &Data,
3808 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3809 ASTContext &C = CGF.getContext();
3810 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3811 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3812 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3813 ? OMPD_taskloop
3814 : OMPD_task;
3815 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3816 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3817 LValue SrcBase;
3818 bool IsTargetTask =
3819 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3820 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3821 // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3822 // PointersArray and SizesArray. The original variables for these arrays are
3823 // not captured and we get their addresses explicitly.
3824 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3825 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3826 SrcBase = CGF.MakeAddrLValue(
3827 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3828 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3829 SharedsTy);
3830 }
3831 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3832 for (const PrivateDataTy &Pair : Privates) {
3833 const VarDecl *VD = Pair.second.PrivateCopy;
3834 const Expr *Init = VD->getAnyInitializer();
3835 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3836 !CGF.isTrivialInitializer(Init)))) {
3837 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3838 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3839 const VarDecl *OriginalVD = Pair.second.Original;
3840 // Check if the variable is the target-based BasePointersArray,
3841 // PointersArray or SizesArray.
3842 LValue SharedRefLValue;
3843 QualType Type = PrivateLValue.getType();
3844 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3845 if (IsTargetTask && !SharedField) {
3846 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3847 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3848 cast<CapturedDecl>(OriginalVD->getDeclContext())
3849 ->getNumParams() == 0 &&
3850 isa<TranslationUnitDecl>(
3851 cast<CapturedDecl>(OriginalVD->getDeclContext())
3852 ->getDeclContext()) &&
3853 "Expected artificial target data variable.");
3854 SharedRefLValue =
3855 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3856 } else if (ForDup) {
3857 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3858 SharedRefLValue = CGF.MakeAddrLValue(
3859 Address(SharedRefLValue.getPointer(CGF),
3860 C.getDeclAlign(OriginalVD)),
3861 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3862 SharedRefLValue.getTBAAInfo());
3863 } else if (CGF.LambdaCaptureFields.count(
3864 Pair.second.Original->getCanonicalDecl()) > 0 ||
3865 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3866 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3867 } else {
3868 // Processing for implicitly captured variables.
3869 InlinedOpenMPRegionRAII Region(
3870 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3871 /*HasCancel=*/false);
3872 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3873 }
3874 if (Type->isArrayType()) {
3875 // Initialize firstprivate array.
3876 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3877 // Perform simple memcpy.
3878 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3879 } else {
3880 // Initialize firstprivate array using element-by-element
3881 // initialization.
3882 CGF.EmitOMPAggregateAssign(
3883 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3884 Type,
3885 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3886 Address SrcElement) {
3887 // Clean up any temporaries needed by the initialization.
3888 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3889 InitScope.addPrivate(
3890 Elem, [SrcElement]() -> Address { return SrcElement; });
3891 (void)InitScope.Privatize();
3892 // Emit initialization for single element.
3893 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3894 CGF, &CapturesInfo);
3895 CGF.EmitAnyExprToMem(Init, DestElement,
3896 Init->getType().getQualifiers(),
3897 /*IsInitializer=*/false);
3898 });
3899 }
3900 } else {
3901 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3902 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3903 return SharedRefLValue.getAddress(CGF);
3904 });
3905 (void)InitScope.Privatize();
3906 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3907 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3908 /*capturedByInit=*/false);
3909 }
3910 } else {
3911 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3912 }
3913 }
3914 ++FI;
3915 }
3916 }
3917
3918 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3919 static bool checkInitIsRequired(CodeGenFunction &CGF,
3920 ArrayRef<PrivateDataTy> Privates) {
3921 bool InitRequired = false;
3922 for (const PrivateDataTy &Pair : Privates) {
3923 const VarDecl *VD = Pair.second.PrivateCopy;
3924 const Expr *Init = VD->getAnyInitializer();
3925 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3926 !CGF.isTrivialInitializer(Init));
3927 if (InitRequired)
3928 break;
3929 }
3930 return InitRequired;
3931 }
3932
3933
3934 /// Emit task_dup function (for initialization of
3935 /// private/firstprivate/lastprivate vars and last_iter flag)
3936 /// \code
3937 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3938 /// lastpriv) {
3939 /// // setup lastprivate flag
3940 /// task_dst->last = lastpriv;
3941 /// // could be constructor calls here...
3942 /// }
3943 /// \endcode
3944 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3945 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3946 const OMPExecutableDirective &D,
3947 QualType KmpTaskTWithPrivatesPtrQTy,
3948 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3949 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3950 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3951 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3952 ASTContext &C = CGM.getContext();
3953 FunctionArgList Args;
3954 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3955 KmpTaskTWithPrivatesPtrQTy,
3956 ImplicitParamDecl::Other);
3957 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3958 KmpTaskTWithPrivatesPtrQTy,
3959 ImplicitParamDecl::Other);
3960 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3961 ImplicitParamDecl::Other);
3962 Args.push_back(&DstArg);
3963 Args.push_back(&SrcArg);
3964 Args.push_back(&LastprivArg);
3965 const auto &TaskDupFnInfo =
3966 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3967 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3968 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3969 auto *TaskDup = llvm::Function::Create(
3970 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3971 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3972 TaskDup->setDoesNotRecurse();
3973 CodeGenFunction CGF(CGM);
3974 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3975 Loc);
3976
3977 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3978 CGF.GetAddrOfLocalVar(&DstArg),
3979 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3980 // task_dst->liter = lastpriv;
3981 if (WithLastIter) {
3982 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3983 LValue Base = CGF.EmitLValueForField(
3984 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3985 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3986 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3987 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3988 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3989 }
3990
3991 // Emit initial values for private copies (if any).
3992 assert(!Privates.empty());
3993 Address KmpTaskSharedsPtr = Address::invalid();
3994 if (!Data.FirstprivateVars.empty()) {
3995 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3996 CGF.GetAddrOfLocalVar(&SrcArg),
3997 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3998 LValue Base = CGF.EmitLValueForField(
3999 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4000 KmpTaskSharedsPtr = Address(
4001 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4002 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4003 KmpTaskTShareds)),
4004 Loc),
4005 CGM.getNaturalTypeAlignment(SharedsTy));
4006 }
4007 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4008 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4009 CGF.FinishFunction();
4010 return TaskDup;
4011 }
4012
4013 /// Checks if destructor function is required to be generated.
4014 /// \return true if cleanups are required, false otherwise.
4015 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD)4016 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4017 bool NeedsCleanup = false;
4018 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4019 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4020 for (const FieldDecl *FD : PrivateRD->fields()) {
4021 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4022 if (NeedsCleanup)
4023 break;
4024 }
4025 return NeedsCleanup;
4026 }
4027
4028 namespace {
4029 /// Loop generator for OpenMP iterator expression.
4030 class OMPIteratorGeneratorScope final
4031 : public CodeGenFunction::OMPPrivateScope {
4032 CodeGenFunction &CGF;
4033 const OMPIteratorExpr *E = nullptr;
4034 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4035 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4036 OMPIteratorGeneratorScope() = delete;
4037 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4038
4039 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)4040 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4041 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4042 if (!E)
4043 return;
4044 SmallVector<llvm::Value *, 4> Uppers;
4045 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4046 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4047 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4048 addPrivate(VD, [&CGF, VD]() {
4049 return CGF.CreateMemTemp(VD->getType(), VD->getName());
4050 });
4051 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4052 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4053 return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4054 "counter.addr");
4055 });
4056 }
4057 Privatize();
4058
4059 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4060 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4061 LValue CLVal =
4062 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4063 HelperData.CounterVD->getType());
4064 // Counter = 0;
4065 CGF.EmitStoreOfScalar(
4066 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4067 CLVal);
4068 CodeGenFunction::JumpDest &ContDest =
4069 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4070 CodeGenFunction::JumpDest &ExitDest =
4071 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4072 // N = <number-of_iterations>;
4073 llvm::Value *N = Uppers[I];
4074 // cont:
4075 // if (Counter < N) goto body; else goto exit;
4076 CGF.EmitBlock(ContDest.getBlock());
4077 auto *CVal =
4078 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4079 llvm::Value *Cmp =
4080 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4081 ? CGF.Builder.CreateICmpSLT(CVal, N)
4082 : CGF.Builder.CreateICmpULT(CVal, N);
4083 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4084 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4085 // body:
4086 CGF.EmitBlock(BodyBB);
4087 // Iteri = Begini + Counter * Stepi;
4088 CGF.EmitIgnoredExpr(HelperData.Update);
4089 }
4090 }
~OMPIteratorGeneratorScope()4091 ~OMPIteratorGeneratorScope() {
4092 if (!E)
4093 return;
4094 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4095 // Counter = Counter + 1;
4096 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4097 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4098 // goto cont;
4099 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4100 // exit:
4101 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4102 }
4103 }
4104 };
4105 } // namespace
4106
4107 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)4108 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4109 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4110 llvm::Value *Addr;
4111 if (OASE) {
4112 const Expr *Base = OASE->getBase();
4113 Addr = CGF.EmitScalarExpr(Base);
4114 } else {
4115 Addr = CGF.EmitLValue(E).getPointer(CGF);
4116 }
4117 llvm::Value *SizeVal;
4118 QualType Ty = E->getType();
4119 if (OASE) {
4120 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4121 for (const Expr *SE : OASE->getDimensions()) {
4122 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4123 Sz = CGF.EmitScalarConversion(
4124 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4125 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4126 }
4127 } else if (const auto *ASE =
4128 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4129 LValue UpAddrLVal =
4130 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4131 llvm::Value *UpAddr =
4132 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4133 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4134 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4135 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4136 } else {
4137 SizeVal = CGF.getTypeSize(Ty);
4138 }
4139 return std::make_pair(Addr, SizeVal);
4140 }
4141
4142 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)4143 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4144 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4145 if (KmpTaskAffinityInfoTy.isNull()) {
4146 RecordDecl *KmpAffinityInfoRD =
4147 C.buildImplicitRecord("kmp_task_affinity_info_t");
4148 KmpAffinityInfoRD->startDefinition();
4149 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4150 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4151 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4152 KmpAffinityInfoRD->completeDefinition();
4153 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4154 }
4155 }
4156
4157 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4158 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4159 const OMPExecutableDirective &D,
4160 llvm::Function *TaskFunction, QualType SharedsTy,
4161 Address Shareds, const OMPTaskDataTy &Data) {
4162 ASTContext &C = CGM.getContext();
4163 llvm::SmallVector<PrivateDataTy, 4> Privates;
4164 // Aggregate privates and sort them by the alignment.
4165 const auto *I = Data.PrivateCopies.begin();
4166 for (const Expr *E : Data.PrivateVars) {
4167 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4168 Privates.emplace_back(
4169 C.getDeclAlign(VD),
4170 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4171 /*PrivateElemInit=*/nullptr));
4172 ++I;
4173 }
4174 I = Data.FirstprivateCopies.begin();
4175 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4176 for (const Expr *E : Data.FirstprivateVars) {
4177 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4178 Privates.emplace_back(
4179 C.getDeclAlign(VD),
4180 PrivateHelpersTy(
4181 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4182 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4183 ++I;
4184 ++IElemInitRef;
4185 }
4186 I = Data.LastprivateCopies.begin();
4187 for (const Expr *E : Data.LastprivateVars) {
4188 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4189 Privates.emplace_back(
4190 C.getDeclAlign(VD),
4191 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4192 /*PrivateElemInit=*/nullptr));
4193 ++I;
4194 }
4195 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4196 return L.first > R.first;
4197 });
4198 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4199 // Build type kmp_routine_entry_t (if not built yet).
4200 emitKmpRoutineEntryT(KmpInt32Ty);
4201 // Build type kmp_task_t (if not built yet).
4202 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4203 if (SavedKmpTaskloopTQTy.isNull()) {
4204 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4205 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4206 }
4207 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4208 } else {
4209 assert((D.getDirectiveKind() == OMPD_task ||
4210 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4211 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4212 "Expected taskloop, task or target directive");
4213 if (SavedKmpTaskTQTy.isNull()) {
4214 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4215 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4216 }
4217 KmpTaskTQTy = SavedKmpTaskTQTy;
4218 }
4219 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4220 // Build particular struct kmp_task_t for the given task.
4221 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4222 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4223 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4224 QualType KmpTaskTWithPrivatesPtrQTy =
4225 C.getPointerType(KmpTaskTWithPrivatesQTy);
4226 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
4227 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4228 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4229 KmpTaskTWithPrivatesTy->getPointerTo(DefaultAS);
4230 llvm::Value *KmpTaskTWithPrivatesTySize =
4231 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4232 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4233
4234 // Emit initial values for private copies (if any).
4235 llvm::Value *TaskPrivatesMap = nullptr;
4236 llvm::Type *TaskPrivatesMapTy =
4237 std::next(TaskFunction->arg_begin(), 3)->getType();
4238 if (!Privates.empty()) {
4239 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4240 TaskPrivatesMap = emitTaskPrivateMappingFunction(
4241 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4242 FI->getType(), Privates);
4243 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4244 TaskPrivatesMap, TaskPrivatesMapTy);
4245 } else {
4246 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4247 cast<llvm::PointerType>(TaskPrivatesMapTy));
4248 }
4249 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4250 // kmp_task_t *tt);
4251 llvm::Function *TaskEntry = emitProxyTaskFunction(
4252 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4253 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4254 TaskPrivatesMap);
4255
4256 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4257 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4258 // kmp_routine_entry_t *task_entry);
4259 // Task flags. Format is taken from
4260 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4261 // description of kmp_tasking_flags struct.
4262 enum {
4263 TiedFlag = 0x1,
4264 FinalFlag = 0x2,
4265 DestructorsFlag = 0x8,
4266 PriorityFlag = 0x20,
4267 DetachableFlag = 0x40,
4268 };
4269 unsigned Flags = Data.Tied ? TiedFlag : 0;
4270 bool NeedsCleanup = false;
4271 if (!Privates.empty()) {
4272 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4273 if (NeedsCleanup)
4274 Flags = Flags | DestructorsFlag;
4275 }
4276 if (Data.Priority.getInt())
4277 Flags = Flags | PriorityFlag;
4278 if (D.hasClausesOfKind<OMPDetachClause>())
4279 Flags = Flags | DetachableFlag;
4280 llvm::Value *TaskFlags =
4281 Data.Final.getPointer()
4282 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4283 CGF.Builder.getInt32(FinalFlag),
4284 CGF.Builder.getInt32(/*C=*/0))
4285 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4286 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4287 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4288 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4289 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4290 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4291 TaskEntry, KmpRoutineEntryPtrTy)};
4292 llvm::Value *NewTask;
4293 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4294 // Check if we have any device clause associated with the directive.
4295 const Expr *Device = nullptr;
4296 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4297 Device = C->getDevice();
4298 // Emit device ID if any otherwise use default value.
4299 llvm::Value *DeviceID;
4300 if (Device)
4301 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4302 CGF.Int64Ty, /*isSigned=*/true);
4303 else
4304 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4305 AllocArgs.push_back(DeviceID);
4306 NewTask = CGF.EmitRuntimeCall(
4307 OMPBuilder.getOrCreateRuntimeFunction(
4308 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4309 AllocArgs);
4310 } else {
4311 NewTask =
4312 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4313 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4314 AllocArgs);
4315 }
4316 // Emit detach clause initialization.
4317 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4318 // task_descriptor);
4319 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4320 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4321 LValue EvtLVal = CGF.EmitLValue(Evt);
4322
4323 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4324 // int gtid, kmp_task_t *task);
4325 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4326 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4327 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4328 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4329 OMPBuilder.getOrCreateRuntimeFunction(
4330 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4331 {Loc, Tid, NewTask});
4332 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4333 Evt->getExprLoc());
4334 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4335 }
4336 // Process affinity clauses.
4337 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4338 // Process list of affinity data.
4339 ASTContext &C = CGM.getContext();
4340 Address AffinitiesArray = Address::invalid();
4341 // Calculate number of elements to form the array of affinity data.
4342 llvm::Value *NumOfElements = nullptr;
4343 unsigned NumAffinities = 0;
4344 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4345 if (const Expr *Modifier = C->getModifier()) {
4346 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4347 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4348 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4349 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4350 NumOfElements =
4351 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4352 }
4353 } else {
4354 NumAffinities += C->varlist_size();
4355 }
4356 }
4357 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4358 // Fields ids in kmp_task_affinity_info record.
4359 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4360
4361 QualType KmpTaskAffinityInfoArrayTy;
4362 if (NumOfElements) {
4363 NumOfElements = CGF.Builder.CreateNUWAdd(
4364 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4365 OpaqueValueExpr OVE(
4366 Loc,
4367 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4368 VK_RValue);
4369 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4370 RValue::get(NumOfElements));
4371 KmpTaskAffinityInfoArrayTy =
4372 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4373 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4374 // Properly emit variable-sized array.
4375 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4376 ImplicitParamDecl::Other);
4377 CGF.EmitVarDecl(*PD);
4378 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4379 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4380 /*isSigned=*/false);
4381 } else {
4382 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4383 KmpTaskAffinityInfoTy,
4384 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4385 ArrayType::Normal, /*IndexTypeQuals=*/0);
4386 AffinitiesArray =
4387 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4388 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4389 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4390 /*isSigned=*/false);
4391 }
4392
4393 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4394 // Fill array by elements without iterators.
4395 unsigned Pos = 0;
4396 bool HasIterator = false;
4397 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4398 if (C->getModifier()) {
4399 HasIterator = true;
4400 continue;
4401 }
4402 for (const Expr *E : C->varlists()) {
4403 llvm::Value *Addr;
4404 llvm::Value *Size;
4405 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4406 LValue Base =
4407 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4408 KmpTaskAffinityInfoTy);
4409 // affs[i].base_addr = &<Affinities[i].second>;
4410 LValue BaseAddrLVal = CGF.EmitLValueForField(
4411 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4412 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4413 BaseAddrLVal);
4414 // affs[i].len = sizeof(<Affinities[i].second>);
4415 LValue LenLVal = CGF.EmitLValueForField(
4416 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4417 CGF.EmitStoreOfScalar(Size, LenLVal);
4418 ++Pos;
4419 }
4420 }
4421 LValue PosLVal;
4422 if (HasIterator) {
4423 PosLVal = CGF.MakeAddrLValue(
4424 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4425 C.getSizeType());
4426 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4427 }
4428 // Process elements with iterators.
4429 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4430 const Expr *Modifier = C->getModifier();
4431 if (!Modifier)
4432 continue;
4433 OMPIteratorGeneratorScope IteratorScope(
4434 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4435 for (const Expr *E : C->varlists()) {
4436 llvm::Value *Addr;
4437 llvm::Value *Size;
4438 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4439 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4440 LValue Base = CGF.MakeAddrLValue(
4441 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4442 AffinitiesArray.getAlignment()),
4443 KmpTaskAffinityInfoTy);
4444 // affs[i].base_addr = &<Affinities[i].second>;
4445 LValue BaseAddrLVal = CGF.EmitLValueForField(
4446 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4447 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4448 BaseAddrLVal);
4449 // affs[i].len = sizeof(<Affinities[i].second>);
4450 LValue LenLVal = CGF.EmitLValueForField(
4451 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4452 CGF.EmitStoreOfScalar(Size, LenLVal);
4453 Idx = CGF.Builder.CreateNUWAdd(
4454 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4455 CGF.EmitStoreOfScalar(Idx, PosLVal);
4456 }
4457 }
4458 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4459 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4460 // naffins, kmp_task_affinity_info_t *affin_list);
4461 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4462 llvm::Value *GTid = getThreadID(CGF, Loc);
4463 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4464 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4465 // FIXME: Emit the function and ignore its result for now unless the
4466 // runtime function is properly implemented.
4467 (void)CGF.EmitRuntimeCall(
4468 OMPBuilder.getOrCreateRuntimeFunction(
4469 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4470 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4471 }
4472 llvm::Value *NewTaskNewTaskTTy =
4473 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4474 NewTask, KmpTaskTWithPrivatesPtrTy);
4475 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4476 KmpTaskTWithPrivatesQTy);
4477 LValue TDBase =
4478 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4479 // Fill the data in the resulting kmp_task_t record.
4480 // Copy shareds if there are any.
4481 Address KmpTaskSharedsPtr = Address::invalid();
4482 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4483 KmpTaskSharedsPtr =
4484 Address(CGF.EmitLoadOfScalar(
4485 CGF.EmitLValueForField(
4486 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4487 KmpTaskTShareds)),
4488 Loc),
4489 CGM.getNaturalTypeAlignment(SharedsTy));
4490 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4491 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4492 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4493 }
4494 // Emit initial values for private copies (if any).
4495 TaskResultTy Result;
4496 if (!Privates.empty()) {
4497 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4498 SharedsTy, SharedsPtrTy, Data, Privates,
4499 /*ForDup=*/false);
4500 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4501 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4502 Result.TaskDupFn = emitTaskDupFunction(
4503 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4504 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4505 /*WithLastIter=*/!Data.LastprivateVars.empty());
4506 }
4507 }
4508 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4509 enum { Priority = 0, Destructors = 1 };
4510 // Provide pointer to function with destructors for privates.
4511 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4512 const RecordDecl *KmpCmplrdataUD =
4513 (*FI)->getType()->getAsUnionType()->getDecl();
4514 if (NeedsCleanup) {
4515 llvm::Value *DestructorFn = emitDestructorsFunction(
4516 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4517 KmpTaskTWithPrivatesQTy);
4518 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4519 LValue DestructorsLV = CGF.EmitLValueForField(
4520 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4521 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4522 DestructorFn, KmpRoutineEntryPtrTy),
4523 DestructorsLV);
4524 }
4525 // Set priority.
4526 if (Data.Priority.getInt()) {
4527 LValue Data2LV = CGF.EmitLValueForField(
4528 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4529 LValue PriorityLV = CGF.EmitLValueForField(
4530 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4531 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4532 }
4533 Result.NewTask = NewTask;
4534 Result.TaskEntry = TaskEntry;
4535 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4536 Result.TDBase = TDBase;
4537 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4538 return Result;
4539 }
4540
4541 namespace {
4542 /// Dependence kind for RTL.
4543 enum RTLDependenceKindTy {
4544 DepIn = 0x01,
4545 DepInOut = 0x3,
4546 DepMutexInOutSet = 0x4
4547 };
4548 /// Fields ids in kmp_depend_info record.
4549 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4550 } // namespace
4551
4552 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4553 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4554 RTLDependenceKindTy DepKind;
4555 switch (K) {
4556 case OMPC_DEPEND_in:
4557 DepKind = DepIn;
4558 break;
4559 // Out and InOut dependencies must use the same code.
4560 case OMPC_DEPEND_out:
4561 case OMPC_DEPEND_inout:
4562 DepKind = DepInOut;
4563 break;
4564 case OMPC_DEPEND_mutexinoutset:
4565 DepKind = DepMutexInOutSet;
4566 break;
4567 case OMPC_DEPEND_source:
4568 case OMPC_DEPEND_sink:
4569 case OMPC_DEPEND_depobj:
4570 case OMPC_DEPEND_unknown:
4571 llvm_unreachable("Unknown task dependence type");
4572 }
4573 return DepKind;
4574 }
4575
4576 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4577 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4578 QualType &FlagsTy) {
4579 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4580 if (KmpDependInfoTy.isNull()) {
4581 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4582 KmpDependInfoRD->startDefinition();
4583 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4584 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4585 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4586 KmpDependInfoRD->completeDefinition();
4587 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4588 }
4589 }
4590
4591 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4592 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4593 SourceLocation Loc) {
4594 ASTContext &C = CGM.getContext();
4595 QualType FlagsTy;
4596 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597 RecordDecl *KmpDependInfoRD =
4598 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599 LValue Base = CGF.EmitLoadOfPointerLValue(
4600 DepobjLVal.getAddress(CGF),
4601 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4602 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4603 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4604 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4605 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4606 Base.getTBAAInfo());
4607 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4608 Addr.getPointer(),
4609 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4610 LValue NumDepsBase = CGF.MakeAddrLValue(
4611 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4612 Base.getBaseInfo(), Base.getTBAAInfo());
4613 // NumDeps = deps[i].base_addr;
4614 LValue BaseAddrLVal = CGF.EmitLValueForField(
4615 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4616 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4617 return std::make_pair(NumDeps, Base);
4618 }
4619
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4620 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4621 llvm::PointerUnion<unsigned *, LValue *> Pos,
4622 const OMPTaskDataTy::DependData &Data,
4623 Address DependenciesArray) {
4624 CodeGenModule &CGM = CGF.CGM;
4625 ASTContext &C = CGM.getContext();
4626 QualType FlagsTy;
4627 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4628 RecordDecl *KmpDependInfoRD =
4629 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4630 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4631
4632 OMPIteratorGeneratorScope IteratorScope(
4633 CGF, cast_or_null<OMPIteratorExpr>(
4634 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4635 : nullptr));
4636 for (const Expr *E : Data.DepExprs) {
4637 llvm::Value *Addr;
4638 llvm::Value *Size;
4639 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4640 LValue Base;
4641 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4642 Base = CGF.MakeAddrLValue(
4643 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4644 } else {
4645 LValue &PosLVal = *Pos.get<LValue *>();
4646 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4647 Base = CGF.MakeAddrLValue(
4648 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4649 DependenciesArray.getAlignment()),
4650 KmpDependInfoTy);
4651 }
4652 // deps[i].base_addr = &<Dependencies[i].second>;
4653 LValue BaseAddrLVal = CGF.EmitLValueForField(
4654 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4655 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4656 BaseAddrLVal);
4657 // deps[i].len = sizeof(<Dependencies[i].second>);
4658 LValue LenLVal = CGF.EmitLValueForField(
4659 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4660 CGF.EmitStoreOfScalar(Size, LenLVal);
4661 // deps[i].flags = <Dependencies[i].first>;
4662 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4663 LValue FlagsLVal = CGF.EmitLValueForField(
4664 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4665 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4666 FlagsLVal);
4667 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4668 ++(*P);
4669 } else {
4670 LValue &PosLVal = *Pos.get<LValue *>();
4671 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4672 Idx = CGF.Builder.CreateNUWAdd(Idx,
4673 llvm::ConstantInt::get(Idx->getType(), 1));
4674 CGF.EmitStoreOfScalar(Idx, PosLVal);
4675 }
4676 }
4677 }
4678
4679 static SmallVector<llvm::Value *, 4>
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4680 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4681 const OMPTaskDataTy::DependData &Data) {
4682 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4683 "Expected depobj dependecy kind.");
4684 SmallVector<llvm::Value *, 4> Sizes;
4685 SmallVector<LValue, 4> SizeLVals;
4686 ASTContext &C = CGF.getContext();
4687 QualType FlagsTy;
4688 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689 RecordDecl *KmpDependInfoRD =
4690 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4692 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4693 {
4694 OMPIteratorGeneratorScope IteratorScope(
4695 CGF, cast_or_null<OMPIteratorExpr>(
4696 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4697 : nullptr));
4698 for (const Expr *E : Data.DepExprs) {
4699 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4700 LValue Base = CGF.EmitLoadOfPointerLValue(
4701 DepobjLVal.getAddress(CGF),
4702 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4703 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4704 Base.getAddress(CGF), KmpDependInfoPtrT);
4705 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4706 Base.getTBAAInfo());
4707 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4708 Addr.getPointer(),
4709 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4710 LValue NumDepsBase = CGF.MakeAddrLValue(
4711 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4712 Base.getBaseInfo(), Base.getTBAAInfo());
4713 // NumDeps = deps[i].base_addr;
4714 LValue BaseAddrLVal = CGF.EmitLValueForField(
4715 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4716 llvm::Value *NumDeps =
4717 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4718 LValue NumLVal = CGF.MakeAddrLValue(
4719 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4720 C.getUIntPtrType());
4721 CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4722 llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4723 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4724 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4725 CGF.EmitStoreOfScalar(Add, NumLVal);
4726 SizeLVals.push_back(NumLVal);
4727 }
4728 }
4729 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4730 llvm::Value *Size =
4731 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4732 Sizes.push_back(Size);
4733 }
4734 return Sizes;
4735 }
4736
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4737 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4738 LValue PosLVal,
4739 const OMPTaskDataTy::DependData &Data,
4740 Address DependenciesArray) {
4741 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4742 "Expected depobj dependecy kind.");
4743 ASTContext &C = CGF.getContext();
4744 QualType FlagsTy;
4745 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4746 RecordDecl *KmpDependInfoRD =
4747 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4748 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4749 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4750 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4751 {
4752 OMPIteratorGeneratorScope IteratorScope(
4753 CGF, cast_or_null<OMPIteratorExpr>(
4754 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4755 : nullptr));
4756 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4757 const Expr *E = Data.DepExprs[I];
4758 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4759 LValue Base = CGF.EmitLoadOfPointerLValue(
4760 DepobjLVal.getAddress(CGF),
4761 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4762 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4763 Base.getAddress(CGF), KmpDependInfoPtrT);
4764 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4765 Base.getTBAAInfo());
4766
4767 // Get number of elements in a single depobj.
4768 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4769 Addr.getPointer(),
4770 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4771 LValue NumDepsBase = CGF.MakeAddrLValue(
4772 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4773 Base.getBaseInfo(), Base.getTBAAInfo());
4774 // NumDeps = deps[i].base_addr;
4775 LValue BaseAddrLVal = CGF.EmitLValueForField(
4776 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4777 llvm::Value *NumDeps =
4778 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4779
4780 // memcopy dependency data.
4781 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4782 ElSize,
4783 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4784 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4785 Address DepAddr =
4786 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4787 DependenciesArray.getAlignment());
4788 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4789
4790 // Increase pos.
4791 // pos += size;
4792 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4793 CGF.EmitStoreOfScalar(Add, PosLVal);
4794 }
4795 }
4796 }
4797
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4798 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4799 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4800 SourceLocation Loc) {
4801 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4802 return D.DepExprs.empty();
4803 }))
4804 return std::make_pair(nullptr, Address::invalid());
4805 // Process list of dependencies.
4806 ASTContext &C = CGM.getContext();
4807 Address DependenciesArray = Address::invalid();
4808 llvm::Value *NumOfElements = nullptr;
4809 unsigned NumDependencies = std::accumulate(
4810 Dependencies.begin(), Dependencies.end(), 0,
4811 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4812 return D.DepKind == OMPC_DEPEND_depobj
4813 ? V
4814 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4815 });
4816 QualType FlagsTy;
4817 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4818 bool HasDepobjDeps = false;
4819 bool HasRegularWithIterators = false;
4820 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4821 llvm::Value *NumOfRegularWithIterators =
4822 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4823 // Calculate number of depobj dependecies and regular deps with the iterators.
4824 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4825 if (D.DepKind == OMPC_DEPEND_depobj) {
4826 SmallVector<llvm::Value *, 4> Sizes =
4827 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4828 for (llvm::Value *Size : Sizes) {
4829 NumOfDepobjElements =
4830 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4831 }
4832 HasDepobjDeps = true;
4833 continue;
4834 }
4835 // Include number of iterations, if any.
4836 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4837 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4838 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4839 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4840 NumOfRegularWithIterators =
4841 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4842 }
4843 HasRegularWithIterators = true;
4844 continue;
4845 }
4846 }
4847
4848 QualType KmpDependInfoArrayTy;
4849 if (HasDepobjDeps || HasRegularWithIterators) {
4850 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4851 /*isSigned=*/false);
4852 if (HasDepobjDeps) {
4853 NumOfElements =
4854 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4855 }
4856 if (HasRegularWithIterators) {
4857 NumOfElements =
4858 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4859 }
4860 OpaqueValueExpr OVE(Loc,
4861 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4862 VK_RValue);
4863 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4864 RValue::get(NumOfElements));
4865 KmpDependInfoArrayTy =
4866 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4867 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4868 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4869 // Properly emit variable-sized array.
4870 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4871 ImplicitParamDecl::Other);
4872 CGF.EmitVarDecl(*PD);
4873 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4874 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4875 /*isSigned=*/false);
4876 } else {
4877 KmpDependInfoArrayTy = C.getConstantArrayType(
4878 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4879 ArrayType::Normal, /*IndexTypeQuals=*/0);
4880 DependenciesArray =
4881 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4882 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4883 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4884 /*isSigned=*/false);
4885 }
4886 unsigned Pos = 0;
4887 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4888 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4889 Dependencies[I].IteratorExpr)
4890 continue;
4891 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4892 DependenciesArray);
4893 }
4894 // Copy regular dependecies with iterators.
4895 LValue PosLVal = CGF.MakeAddrLValue(
4896 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4897 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4898 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4899 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4900 !Dependencies[I].IteratorExpr)
4901 continue;
4902 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4903 DependenciesArray);
4904 }
4905 // Copy final depobj arrays without iterators.
4906 if (HasDepobjDeps) {
4907 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4908 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4909 continue;
4910 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4911 DependenciesArray);
4912 }
4913 }
4914 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4915 DependenciesArray, CGF.VoidPtrTy);
4916 return std::make_pair(NumOfElements, DependenciesArray);
4917 }
4918
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4919 Address CGOpenMPRuntime::emitDepobjDependClause(
4920 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4921 SourceLocation Loc) {
4922 if (Dependencies.DepExprs.empty())
4923 return Address::invalid();
4924 // Process list of dependencies.
4925 ASTContext &C = CGM.getContext();
4926 Address DependenciesArray = Address::invalid();
4927 unsigned NumDependencies = Dependencies.DepExprs.size();
4928 QualType FlagsTy;
4929 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4930 RecordDecl *KmpDependInfoRD =
4931 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4932
4933 llvm::Value *Size;
4934 // Define type kmp_depend_info[<Dependencies.size()>];
4935 // For depobj reserve one extra element to store the number of elements.
4936 // It is required to handle depobj(x) update(in) construct.
4937 // kmp_depend_info[<Dependencies.size()>] deps;
4938 llvm::Value *NumDepsVal;
4939 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4940 if (const auto *IE =
4941 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4942 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4943 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4944 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4945 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4946 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4947 }
4948 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4949 NumDepsVal);
4950 CharUnits SizeInBytes =
4951 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4952 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4953 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4954 NumDepsVal =
4955 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4956 } else {
4957 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4958 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4959 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4960 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4961 Size = CGM.getSize(Sz.alignTo(Align));
4962 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4963 }
4964 // Need to allocate on the dynamic memory.
4965 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4966 // Use default allocator.
4967 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4968 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4969
4970 llvm::Value *Addr =
4971 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4972 CGM.getModule(), OMPRTL___kmpc_alloc),
4973 Args, ".dep.arr.addr");
4974 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4975 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4976 DependenciesArray = Address(Addr, Align);
4977 // Write number of elements in the first element of array for depobj.
4978 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4979 // deps[i].base_addr = NumDependencies;
4980 LValue BaseAddrLVal = CGF.EmitLValueForField(
4981 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4982 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4983 llvm::PointerUnion<unsigned *, LValue *> Pos;
4984 unsigned Idx = 1;
4985 LValue PosLVal;
4986 if (Dependencies.IteratorExpr) {
4987 PosLVal = CGF.MakeAddrLValue(
4988 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4989 C.getSizeType());
4990 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4991 /*IsInit=*/true);
4992 Pos = &PosLVal;
4993 } else {
4994 Pos = &Idx;
4995 }
4996 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4997 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4999 return DependenciesArray;
5000 }
5001
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)5002 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5003 SourceLocation Loc) {
5004 ASTContext &C = CGM.getContext();
5005 QualType FlagsTy;
5006 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5007 LValue Base = CGF.EmitLoadOfPointerLValue(
5008 DepobjLVal.getAddress(CGF),
5009 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5010 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5011 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5012 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5013 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5014 Addr.getPointer(),
5015 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5016 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5017 CGF.VoidPtrTy);
5018 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5019 // Use default allocator.
5020 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5021 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5022
5023 // _kmpc_free(gtid, addr, nullptr);
5024 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5025 CGM.getModule(), OMPRTL___kmpc_free),
5026 Args);
5027 }
5028
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)5029 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5030 OpenMPDependClauseKind NewDepKind,
5031 SourceLocation Loc) {
5032 ASTContext &C = CGM.getContext();
5033 QualType FlagsTy;
5034 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5035 RecordDecl *KmpDependInfoRD =
5036 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5037 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5038 llvm::Value *NumDeps;
5039 LValue Base;
5040 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5041
5042 Address Begin = Base.getAddress(CGF);
5043 // Cast from pointer to array type to pointer to single element.
5044 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5045 // The basic structure here is a while-do loop.
5046 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5047 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5048 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5049 CGF.EmitBlock(BodyBB);
5050 llvm::PHINode *ElementPHI =
5051 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5052 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5053 Begin = Address(ElementPHI, Begin.getAlignment());
5054 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5055 Base.getTBAAInfo());
5056 // deps[i].flags = NewDepKind;
5057 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5058 LValue FlagsLVal = CGF.EmitLValueForField(
5059 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5060 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5061 FlagsLVal);
5062
5063 // Shift the address forward by one element.
5064 Address ElementNext =
5065 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5066 ElementPHI->addIncoming(ElementNext.getPointer(),
5067 CGF.Builder.GetInsertBlock());
5068 llvm::Value *IsEmpty =
5069 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5070 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5071 // Done.
5072 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5073 }
5074
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5075 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5076 const OMPExecutableDirective &D,
5077 llvm::Function *TaskFunction,
5078 QualType SharedsTy, Address Shareds,
5079 const Expr *IfCond,
5080 const OMPTaskDataTy &Data) {
5081 if (!CGF.HaveInsertPoint())
5082 return;
5083
5084 TaskResultTy Result =
5085 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5086 llvm::Value *NewTask = Result.NewTask;
5087 llvm::Function *TaskEntry = Result.TaskEntry;
5088 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5089 LValue TDBase = Result.TDBase;
5090 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5091 // Process list of dependences.
5092 Address DependenciesArray = Address::invalid();
5093 llvm::Value *NumOfElements;
5094 std::tie(NumOfElements, DependenciesArray) =
5095 emitDependClause(CGF, Data.Dependences, Loc);
5096
5097 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5098 // libcall.
5099 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5100 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5101 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5102 // list is not empty
5103 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5104 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5105 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5106 llvm::Value *DepTaskArgs[7];
5107 if (!Data.Dependences.empty()) {
5108 DepTaskArgs[0] = UpLoc;
5109 DepTaskArgs[1] = ThreadID;
5110 DepTaskArgs[2] = NewTask;
5111 DepTaskArgs[3] = NumOfElements;
5112 DepTaskArgs[4] = DependenciesArray.getPointer();
5113 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5114 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5115 }
5116 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5117 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5118 if (!Data.Tied) {
5119 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5120 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5121 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5122 }
5123 if (!Data.Dependences.empty()) {
5124 CGF.EmitRuntimeCall(
5125 OMPBuilder.getOrCreateRuntimeFunction(
5126 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5127 DepTaskArgs);
5128 } else {
5129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5130 CGM.getModule(), OMPRTL___kmpc_omp_task),
5131 TaskArgs);
5132 }
5133 // Check if parent region is untied and build return for untied task;
5134 if (auto *Region =
5135 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5136 Region->emitUntiedSwitch(CGF);
5137 };
5138
5139 llvm::Value *DepWaitTaskArgs[6];
5140 if (!Data.Dependences.empty()) {
5141 DepWaitTaskArgs[0] = UpLoc;
5142 DepWaitTaskArgs[1] = ThreadID;
5143 DepWaitTaskArgs[2] = NumOfElements;
5144 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5145 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5146 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5147 }
5148 auto &M = CGM.getModule();
5149 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5150 TaskEntry, &Data, &DepWaitTaskArgs,
5151 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5152 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5153 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5154 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5155 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5156 // is specified.
5157 if (!Data.Dependences.empty())
5158 CGF.EmitRuntimeCall(
5159 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5160 DepWaitTaskArgs);
5161 // Call proxy_task_entry(gtid, new_task);
5162 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5163 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5164 Action.Enter(CGF);
5165 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5166 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5167 OutlinedFnArgs);
5168 };
5169
5170 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5171 // kmp_task_t *new_task);
5172 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5173 // kmp_task_t *new_task);
5174 RegionCodeGenTy RCG(CodeGen);
5175 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5176 M, OMPRTL___kmpc_omp_task_begin_if0),
5177 TaskArgs,
5178 OMPBuilder.getOrCreateRuntimeFunction(
5179 M, OMPRTL___kmpc_omp_task_complete_if0),
5180 TaskArgs);
5181 RCG.setAction(Action);
5182 RCG(CGF);
5183 };
5184
5185 if (IfCond) {
5186 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5187 } else {
5188 RegionCodeGenTy ThenRCG(ThenCodeGen);
5189 ThenRCG(CGF);
5190 }
5191 }
5192
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5193 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5194 const OMPLoopDirective &D,
5195 llvm::Function *TaskFunction,
5196 QualType SharedsTy, Address Shareds,
5197 const Expr *IfCond,
5198 const OMPTaskDataTy &Data) {
5199 if (!CGF.HaveInsertPoint())
5200 return;
5201 TaskResultTy Result =
5202 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5203 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5204 // libcall.
5205 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5206 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5207 // sched, kmp_uint64 grainsize, void *task_dup);
5208 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5209 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5210 llvm::Value *IfVal;
5211 if (IfCond) {
5212 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5213 /*isSigned=*/true);
5214 } else {
5215 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5216 }
5217
5218 LValue LBLVal = CGF.EmitLValueForField(
5219 Result.TDBase,
5220 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5221 const auto *LBVar =
5222 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5223 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5224 LBLVal.getQuals(),
5225 /*IsInitializer=*/true);
5226 LValue UBLVal = CGF.EmitLValueForField(
5227 Result.TDBase,
5228 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5229 const auto *UBVar =
5230 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5231 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5232 UBLVal.getQuals(),
5233 /*IsInitializer=*/true);
5234 LValue StLVal = CGF.EmitLValueForField(
5235 Result.TDBase,
5236 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5237 const auto *StVar =
5238 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5239 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5240 StLVal.getQuals(),
5241 /*IsInitializer=*/true);
5242 // Store reductions address.
5243 LValue RedLVal = CGF.EmitLValueForField(
5244 Result.TDBase,
5245 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5246 if (Data.Reductions) {
5247 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5248 } else {
5249 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5250 CGF.getContext().VoidPtrTy);
5251 }
5252 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5253 llvm::Value *TaskArgs[] = {
5254 UpLoc,
5255 ThreadID,
5256 Result.NewTask,
5257 IfVal,
5258 LBLVal.getPointer(CGF),
5259 UBLVal.getPointer(CGF),
5260 CGF.EmitLoadOfScalar(StLVal, Loc),
5261 llvm::ConstantInt::getSigned(
5262 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5263 llvm::ConstantInt::getSigned(
5264 CGF.IntTy, Data.Schedule.getPointer()
5265 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5266 : NoSchedule),
5267 Data.Schedule.getPointer()
5268 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5269 /*isSigned=*/false)
5270 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5271 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5272 Result.TaskDupFn, CGF.VoidPtrTy)
5273 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5274 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5275 CGM.getModule(), OMPRTL___kmpc_taskloop),
5276 TaskArgs);
5277 }
5278
5279 /// Emit reduction operation for each element of array (required for
5280 /// array sections) LHS op = RHS.
5281 /// \param Type Type of array.
5282 /// \param LHSVar Variable on the left side of the reduction operation
5283 /// (references element of array in original variable).
5284 /// \param RHSVar Variable on the right side of the reduction operation
5285 /// (references element of array in original variable).
5286 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5287 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5288 static void EmitOMPAggregateReduction(
5289 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5290 const VarDecl *RHSVar,
5291 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5292 const Expr *, const Expr *)> &RedOpGen,
5293 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5294 const Expr *UpExpr = nullptr) {
5295 // Perform element-by-element initialization.
5296 QualType ElementTy;
5297 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5298 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5299
5300 // Drill down to the base element type on both arrays.
5301 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5302 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5303
5304 llvm::Value *RHSBegin = RHSAddr.getPointer();
5305 llvm::Value *LHSBegin = LHSAddr.getPointer();
5306 // Cast from pointer to array type to pointer to single element.
5307 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5308 // The basic structure here is a while-do loop.
5309 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5310 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5311 llvm::Value *IsEmpty =
5312 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5313 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5314
5315 // Enter the loop body, making that address the current address.
5316 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5317 CGF.EmitBlock(BodyBB);
5318
5319 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5320
5321 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5322 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5323 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5324 Address RHSElementCurrent =
5325 Address(RHSElementPHI,
5326 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5327
5328 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5329 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5330 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5331 Address LHSElementCurrent =
5332 Address(LHSElementPHI,
5333 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5334
5335 // Emit copy.
5336 CodeGenFunction::OMPPrivateScope Scope(CGF);
5337 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5338 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5339 Scope.Privatize();
5340 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5341 Scope.ForceCleanup();
5342
5343 // Shift the address forward by one element.
5344 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5345 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5346 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5347 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5348 // Check whether we've reached the end.
5349 llvm::Value *Done =
5350 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5351 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5352 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5353 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5354
5355 // Done.
5356 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5357 }
5358
5359 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5360 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5361 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5362 static void emitReductionCombiner(CodeGenFunction &CGF,
5363 const Expr *ReductionOp) {
5364 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5365 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5366 if (const auto *DRE =
5367 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5368 if (const auto *DRD =
5369 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5370 std::pair<llvm::Function *, llvm::Function *> Reduction =
5371 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5372 RValue Func = RValue::get(Reduction.first);
5373 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5374 CGF.EmitIgnoredExpr(ReductionOp);
5375 return;
5376 }
5377 CGF.EmitIgnoredExpr(ReductionOp);
5378 }
5379
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5380 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5381 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5382 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5383 ArrayRef<const Expr *> ReductionOps) {
5384 ASTContext &C = CGM.getContext();
5385
5386 // void reduction_func(void *LHSArg, void *RHSArg);
5387 FunctionArgList Args;
5388 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5389 ImplicitParamDecl::Other);
5390 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5391 ImplicitParamDecl::Other);
5392 Args.push_back(&LHSArg);
5393 Args.push_back(&RHSArg);
5394 const auto &CGFI =
5395 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5396 std::string Name = getName({"omp", "reduction", "reduction_func"});
5397 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5398 llvm::GlobalValue::InternalLinkage, Name,
5399 &CGM.getModule());
5400 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5401 Fn->setDoesNotRecurse();
5402 CodeGenFunction CGF(CGM);
5403 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5404
5405 // Dst = (void*[n])(LHSArg);
5406 // Src = (void*[n])(RHSArg);
5407 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5408 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5409 ArgsType), CGF.getPointerAlign());
5410 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5411 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5412 ArgsType), CGF.getPointerAlign());
5413
5414 // ...
5415 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5416 // ...
5417 CodeGenFunction::OMPPrivateScope Scope(CGF);
5418 auto IPriv = Privates.begin();
5419 unsigned Idx = 0;
5420 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5421 const auto *RHSVar =
5422 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5423 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5424 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5425 });
5426 const auto *LHSVar =
5427 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5428 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5429 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5430 });
5431 QualType PrivTy = (*IPriv)->getType();
5432 if (PrivTy->isVariablyModifiedType()) {
5433 // Get array size and emit VLA type.
5434 ++Idx;
5435 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5436 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5437 const VariableArrayType *VLA =
5438 CGF.getContext().getAsVariableArrayType(PrivTy);
5439 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5440 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5441 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5442 CGF.EmitVariablyModifiedType(PrivTy);
5443 }
5444 }
5445 Scope.Privatize();
5446 IPriv = Privates.begin();
5447 auto ILHS = LHSExprs.begin();
5448 auto IRHS = RHSExprs.begin();
5449 for (const Expr *E : ReductionOps) {
5450 if ((*IPriv)->getType()->isArrayType()) {
5451 // Emit reduction for array section.
5452 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5453 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5454 EmitOMPAggregateReduction(
5455 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5456 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5457 emitReductionCombiner(CGF, E);
5458 });
5459 } else {
5460 // Emit reduction for array subscript or single variable.
5461 emitReductionCombiner(CGF, E);
5462 }
5463 ++IPriv;
5464 ++ILHS;
5465 ++IRHS;
5466 }
5467 Scope.ForceCleanup();
5468 CGF.FinishFunction();
5469 return Fn;
5470 }
5471
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5472 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5473 const Expr *ReductionOp,
5474 const Expr *PrivateRef,
5475 const DeclRefExpr *LHS,
5476 const DeclRefExpr *RHS) {
5477 if (PrivateRef->getType()->isArrayType()) {
5478 // Emit reduction for array section.
5479 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5480 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5481 EmitOMPAggregateReduction(
5482 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5483 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5484 emitReductionCombiner(CGF, ReductionOp);
5485 });
5486 } else {
5487 // Emit reduction for array subscript or single variable.
5488 emitReductionCombiner(CGF, ReductionOp);
5489 }
5490 }
5491
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5492 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5493 ArrayRef<const Expr *> Privates,
5494 ArrayRef<const Expr *> LHSExprs,
5495 ArrayRef<const Expr *> RHSExprs,
5496 ArrayRef<const Expr *> ReductionOps,
5497 ReductionOptionsTy Options) {
5498 if (!CGF.HaveInsertPoint())
5499 return;
5500
5501 bool WithNowait = Options.WithNowait;
5502 bool SimpleReduction = Options.SimpleReduction;
5503
5504 // Next code should be emitted for reduction:
5505 //
5506 // static kmp_critical_name lock = { 0 };
5507 //
5508 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5509 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5510 // ...
5511 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5512 // *(Type<n>-1*)rhs[<n>-1]);
5513 // }
5514 //
5515 // ...
5516 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5517 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5518 // RedList, reduce_func, &<lock>)) {
5519 // case 1:
5520 // ...
5521 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5522 // ...
5523 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5524 // break;
5525 // case 2:
5526 // ...
5527 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5528 // ...
5529 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5530 // break;
5531 // default:;
5532 // }
5533 //
5534 // if SimpleReduction is true, only the next code is generated:
5535 // ...
5536 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5537 // ...
5538
5539 ASTContext &C = CGM.getContext();
5540
5541 if (SimpleReduction) {
5542 CodeGenFunction::RunCleanupsScope Scope(CGF);
5543 auto IPriv = Privates.begin();
5544 auto ILHS = LHSExprs.begin();
5545 auto IRHS = RHSExprs.begin();
5546 for (const Expr *E : ReductionOps) {
5547 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5548 cast<DeclRefExpr>(*IRHS));
5549 ++IPriv;
5550 ++ILHS;
5551 ++IRHS;
5552 }
5553 return;
5554 }
5555
5556 // 1. Build a list of reduction variables.
5557 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5558 auto Size = RHSExprs.size();
5559 for (const Expr *E : Privates) {
5560 if (E->getType()->isVariablyModifiedType())
5561 // Reserve place for array size.
5562 ++Size;
5563 }
5564 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5565 QualType ReductionArrayTy =
5566 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5567 /*IndexTypeQuals=*/0);
5568 Address ReductionList =
5569 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5570 auto IPriv = Privates.begin();
5571 unsigned Idx = 0;
5572 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5573 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5574 CGF.Builder.CreateStore(
5575 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5576 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5577 Elem);
5578 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5579 // Store array size.
5580 ++Idx;
5581 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5582 llvm::Value *Size = CGF.Builder.CreateIntCast(
5583 CGF.getVLASize(
5584 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5585 .NumElts,
5586 CGF.SizeTy, /*isSigned=*/false);
5587 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5588 Elem);
5589 }
5590 }
5591
5592 // 2. Emit reduce_func().
5593 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
5594 llvm::Function *ReductionFn = emitReductionFunction(
5595 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(DefaultAS), Privates,
5596 LHSExprs, RHSExprs, ReductionOps);
5597
5598 // 3. Create static kmp_critical_name lock = { 0 };
5599 std::string Name = getName({"reduction"});
5600 llvm::Value *Lock = getCriticalRegionLock(Name);
5601
5602 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5603 // RedList, reduce_func, &<lock>);
5604 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5605 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5606 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5607 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5608 ReductionList.getPointer(), CGF.VoidPtrTy);
5609 llvm::Value *Args[] = {
5610 IdentTLoc, // ident_t *<loc>
5611 ThreadId, // i32 <gtid>
5612 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5613 ReductionArrayTySize, // size_type sizeof(RedList)
5614 RL, // void *RedList
5615 ReductionFn, // void (*) (void *, void *) <reduce_func>
5616 Lock // kmp_critical_name *&<lock>
5617 };
5618 llvm::Value *Res = CGF.EmitRuntimeCall(
5619 OMPBuilder.getOrCreateRuntimeFunction(
5620 CGM.getModule(),
5621 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5622 Args);
5623
5624 // 5. Build switch(res)
5625 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5626 llvm::SwitchInst *SwInst =
5627 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5628
5629 // 6. Build case 1:
5630 // ...
5631 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5632 // ...
5633 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5634 // break;
5635 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5636 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5637 CGF.EmitBlock(Case1BB);
5638
5639 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5640 llvm::Value *EndArgs[] = {
5641 IdentTLoc, // ident_t *<loc>
5642 ThreadId, // i32 <gtid>
5643 Lock // kmp_critical_name *&<lock>
5644 };
5645 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5646 CodeGenFunction &CGF, PrePostActionTy &Action) {
5647 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5648 auto IPriv = Privates.begin();
5649 auto ILHS = LHSExprs.begin();
5650 auto IRHS = RHSExprs.begin();
5651 for (const Expr *E : ReductionOps) {
5652 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5653 cast<DeclRefExpr>(*IRHS));
5654 ++IPriv;
5655 ++ILHS;
5656 ++IRHS;
5657 }
5658 };
5659 RegionCodeGenTy RCG(CodeGen);
5660 CommonActionTy Action(
5661 nullptr, llvm::None,
5662 OMPBuilder.getOrCreateRuntimeFunction(
5663 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5664 : OMPRTL___kmpc_end_reduce),
5665 EndArgs);
5666 RCG.setAction(Action);
5667 RCG(CGF);
5668
5669 CGF.EmitBranch(DefaultBB);
5670
5671 // 7. Build case 2:
5672 // ...
5673 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5674 // ...
5675 // break;
5676 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5677 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5678 CGF.EmitBlock(Case2BB);
5679
5680 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5681 CodeGenFunction &CGF, PrePostActionTy &Action) {
5682 auto ILHS = LHSExprs.begin();
5683 auto IRHS = RHSExprs.begin();
5684 auto IPriv = Privates.begin();
5685 for (const Expr *E : ReductionOps) {
5686 const Expr *XExpr = nullptr;
5687 const Expr *EExpr = nullptr;
5688 const Expr *UpExpr = nullptr;
5689 BinaryOperatorKind BO = BO_Comma;
5690 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5691 if (BO->getOpcode() == BO_Assign) {
5692 XExpr = BO->getLHS();
5693 UpExpr = BO->getRHS();
5694 }
5695 }
5696 // Try to emit update expression as a simple atomic.
5697 const Expr *RHSExpr = UpExpr;
5698 if (RHSExpr) {
5699 // Analyze RHS part of the whole expression.
5700 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5701 RHSExpr->IgnoreParenImpCasts())) {
5702 // If this is a conditional operator, analyze its condition for
5703 // min/max reduction operator.
5704 RHSExpr = ACO->getCond();
5705 }
5706 if (const auto *BORHS =
5707 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5708 EExpr = BORHS->getRHS();
5709 BO = BORHS->getOpcode();
5710 }
5711 }
5712 if (XExpr) {
5713 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5714 auto &&AtomicRedGen = [BO, VD,
5715 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5716 const Expr *EExpr, const Expr *UpExpr) {
5717 LValue X = CGF.EmitLValue(XExpr);
5718 RValue E;
5719 if (EExpr)
5720 E = CGF.EmitAnyExpr(EExpr);
5721 CGF.EmitOMPAtomicSimpleUpdateExpr(
5722 X, E, BO, /*IsXLHSInRHSPart=*/true,
5723 llvm::AtomicOrdering::Monotonic, Loc,
5724 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5725 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5726 PrivateScope.addPrivate(
5727 VD, [&CGF, VD, XRValue, Loc]() {
5728 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5729 CGF.emitOMPSimpleStore(
5730 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5731 VD->getType().getNonReferenceType(), Loc);
5732 return LHSTemp;
5733 });
5734 (void)PrivateScope.Privatize();
5735 return CGF.EmitAnyExpr(UpExpr);
5736 });
5737 };
5738 if ((*IPriv)->getType()->isArrayType()) {
5739 // Emit atomic reduction for array section.
5740 const auto *RHSVar =
5741 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5742 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5743 AtomicRedGen, XExpr, EExpr, UpExpr);
5744 } else {
5745 // Emit atomic reduction for array subscript or single variable.
5746 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5747 }
5748 } else {
5749 // Emit as a critical region.
5750 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5751 const Expr *, const Expr *) {
5752 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5753 std::string Name = RT.getName({"atomic_reduction"});
5754 RT.emitCriticalRegion(
5755 CGF, Name,
5756 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5757 Action.Enter(CGF);
5758 emitReductionCombiner(CGF, E);
5759 },
5760 Loc);
5761 };
5762 if ((*IPriv)->getType()->isArrayType()) {
5763 const auto *LHSVar =
5764 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5765 const auto *RHSVar =
5766 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5767 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5768 CritRedGen);
5769 } else {
5770 CritRedGen(CGF, nullptr, nullptr, nullptr);
5771 }
5772 }
5773 ++ILHS;
5774 ++IRHS;
5775 ++IPriv;
5776 }
5777 };
5778 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5779 if (!WithNowait) {
5780 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5781 llvm::Value *EndArgs[] = {
5782 IdentTLoc, // ident_t *<loc>
5783 ThreadId, // i32 <gtid>
5784 Lock // kmp_critical_name *&<lock>
5785 };
5786 CommonActionTy Action(nullptr, llvm::None,
5787 OMPBuilder.getOrCreateRuntimeFunction(
5788 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5789 EndArgs);
5790 AtomicRCG.setAction(Action);
5791 AtomicRCG(CGF);
5792 } else {
5793 AtomicRCG(CGF);
5794 }
5795
5796 CGF.EmitBranch(DefaultBB);
5797 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5798 }
5799
5800 /// Generates unique name for artificial threadprivate variables.
5801 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5802 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5803 const Expr *Ref) {
5804 SmallString<256> Buffer;
5805 llvm::raw_svector_ostream Out(Buffer);
5806 const clang::DeclRefExpr *DE;
5807 const VarDecl *D = ::getBaseDecl(Ref, DE);
5808 if (!D)
5809 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5810 D = D->getCanonicalDecl();
5811 std::string Name = CGM.getOpenMPRuntime().getName(
5812 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5813 Out << Prefix << Name << "_"
5814 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5815 return std::string(Out.str());
5816 }
5817
5818 /// Emits reduction initializer function:
5819 /// \code
5820 /// void @.red_init(void* %arg, void* %orig) {
5821 /// %0 = bitcast void* %arg to <type>*
5822 /// store <type> <init>, <type>* %0
5823 /// ret void
5824 /// }
5825 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5826 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5827 SourceLocation Loc,
5828 ReductionCodeGen &RCG, unsigned N) {
5829 ASTContext &C = CGM.getContext();
5830 QualType VoidPtrTy = C.VoidPtrTy;
5831 VoidPtrTy.addRestrict();
5832 FunctionArgList Args;
5833 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5834 ImplicitParamDecl::Other);
5835 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5836 ImplicitParamDecl::Other);
5837 Args.emplace_back(&Param);
5838 Args.emplace_back(&ParamOrig);
5839 const auto &FnInfo =
5840 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5841 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5842 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5843 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5844 Name, &CGM.getModule());
5845 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5846 Fn->setDoesNotRecurse();
5847 CodeGenFunction CGF(CGM);
5848 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5849 Address PrivateAddr = CGF.EmitLoadOfPointer(
5850 CGF.GetAddrOfLocalVar(&Param),
5851 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5852 llvm::Value *Size = nullptr;
5853 // If the size of the reduction item is non-constant, load it from global
5854 // threadprivate variable.
5855 if (RCG.getSizes(N).second) {
5856 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5857 CGF, CGM.getContext().getSizeType(),
5858 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5859 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5860 CGM.getContext().getSizeType(), Loc);
5861 }
5862 RCG.emitAggregateType(CGF, N, Size);
5863 LValue OrigLVal;
5864 // If initializer uses initializer from declare reduction construct, emit a
5865 // pointer to the address of the original reduction item (reuired by reduction
5866 // initializer)
5867 if (RCG.usesReductionInitializer(N)) {
5868 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5869 SharedAddr = CGF.EmitLoadOfPointer(
5870 SharedAddr,
5871 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5872 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5873 } else {
5874 OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5875 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5876 CGM.getContext().VoidPtrTy);
5877 }
5878 // Emit the initializer:
5879 // %0 = bitcast void* %arg to <type>*
5880 // store <type> <init>, <type>* %0
5881 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5882 [](CodeGenFunction &) { return false; });
5883 CGF.FinishFunction();
5884 return Fn;
5885 }
5886
5887 /// Emits reduction combiner function:
5888 /// \code
5889 /// void @.red_comb(void* %arg0, void* %arg1) {
5890 /// %lhs = bitcast void* %arg0 to <type>*
5891 /// %rhs = bitcast void* %arg1 to <type>*
5892 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5893 /// store <type> %2, <type>* %lhs
5894 /// ret void
5895 /// }
5896 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5897 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5898 SourceLocation Loc,
5899 ReductionCodeGen &RCG, unsigned N,
5900 const Expr *ReductionOp,
5901 const Expr *LHS, const Expr *RHS,
5902 const Expr *PrivateRef) {
5903 ASTContext &C = CGM.getContext();
5904 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5905 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5906 FunctionArgList Args;
5907 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5908 C.VoidPtrTy, ImplicitParamDecl::Other);
5909 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5910 ImplicitParamDecl::Other);
5911 Args.emplace_back(&ParamInOut);
5912 Args.emplace_back(&ParamIn);
5913 const auto &FnInfo =
5914 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5915 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5916 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5917 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5918 Name, &CGM.getModule());
5919 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5920 Fn->setDoesNotRecurse();
5921 CodeGenFunction CGF(CGM);
5922 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5923 llvm::Value *Size = nullptr;
5924 // If the size of the reduction item is non-constant, load it from global
5925 // threadprivate variable.
5926 if (RCG.getSizes(N).second) {
5927 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5928 CGF, CGM.getContext().getSizeType(),
5929 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5930 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5931 CGM.getContext().getSizeType(), Loc);
5932 }
5933 RCG.emitAggregateType(CGF, N, Size);
5934 // Remap lhs and rhs variables to the addresses of the function arguments.
5935 // %lhs = bitcast void* %arg0 to <type>*
5936 // %rhs = bitcast void* %arg1 to <type>*
5937 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5938 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5939 // Pull out the pointer to the variable.
5940 Address PtrAddr = CGF.EmitLoadOfPointer(
5941 CGF.GetAddrOfLocalVar(&ParamInOut),
5942 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5943 return CGF.Builder.CreateElementBitCast(
5944 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5945 });
5946 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5947 // Pull out the pointer to the variable.
5948 Address PtrAddr = CGF.EmitLoadOfPointer(
5949 CGF.GetAddrOfLocalVar(&ParamIn),
5950 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5951 return CGF.Builder.CreateElementBitCast(
5952 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5953 });
5954 PrivateScope.Privatize();
5955 // Emit the combiner body:
5956 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5957 // store <type> %2, <type>* %lhs
5958 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5959 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5960 cast<DeclRefExpr>(RHS));
5961 CGF.FinishFunction();
5962 return Fn;
5963 }
5964
5965 /// Emits reduction finalizer function:
5966 /// \code
5967 /// void @.red_fini(void* %arg) {
5968 /// %0 = bitcast void* %arg to <type>*
5969 /// <destroy>(<type>* %0)
5970 /// ret void
5971 /// }
5972 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5973 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5974 SourceLocation Loc,
5975 ReductionCodeGen &RCG, unsigned N) {
5976 if (!RCG.needCleanups(N))
5977 return nullptr;
5978 ASTContext &C = CGM.getContext();
5979 FunctionArgList Args;
5980 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5981 ImplicitParamDecl::Other);
5982 Args.emplace_back(&Param);
5983 const auto &FnInfo =
5984 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5985 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5986 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5987 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5988 Name, &CGM.getModule());
5989 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5990 Fn->setDoesNotRecurse();
5991 CodeGenFunction CGF(CGM);
5992 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5993 Address PrivateAddr = CGF.EmitLoadOfPointer(
5994 CGF.GetAddrOfLocalVar(&Param),
5995 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996 llvm::Value *Size = nullptr;
5997 // If the size of the reduction item is non-constant, load it from global
5998 // threadprivate variable.
5999 if (RCG.getSizes(N).second) {
6000 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6001 CGF, CGM.getContext().getSizeType(),
6002 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6003 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6004 CGM.getContext().getSizeType(), Loc);
6005 }
6006 RCG.emitAggregateType(CGF, N, Size);
6007 // Emit the finalizer body:
6008 // <destroy>(<type>* %0)
6009 RCG.emitCleanups(CGF, N, PrivateAddr);
6010 CGF.FinishFunction(Loc);
6011 return Fn;
6012 }
6013
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)6014 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6015 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6016 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6017 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6018 return nullptr;
6019
6020 // Build typedef struct:
6021 // kmp_taskred_input {
6022 // void *reduce_shar; // shared reduction item
6023 // void *reduce_orig; // original reduction item used for initialization
6024 // size_t reduce_size; // size of data item
6025 // void *reduce_init; // data initialization routine
6026 // void *reduce_fini; // data finalization routine
6027 // void *reduce_comb; // data combiner routine
6028 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6029 // } kmp_taskred_input_t;
6030 ASTContext &C = CGM.getContext();
6031 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6032 RD->startDefinition();
6033 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6036 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6037 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6038 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6039 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6040 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6041 RD->completeDefinition();
6042 QualType RDType = C.getRecordType(RD);
6043 unsigned Size = Data.ReductionVars.size();
6044 llvm::APInt ArraySize(/*numBits=*/64, Size);
6045 QualType ArrayRDType = C.getConstantArrayType(
6046 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6047 // kmp_task_red_input_t .rd_input.[Size];
6048 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6049 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6050 Data.ReductionCopies, Data.ReductionOps);
6051 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6052 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6053 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6054 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6055 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6056 TaskRedInput.getPointer(), Idxs,
6057 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6058 ".rd_input.gep.");
6059 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6060 // ElemLVal.reduce_shar = &Shareds[Cnt];
6061 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6062 RCG.emitSharedOrigLValue(CGF, Cnt);
6063 llvm::Value *CastedShared =
6064 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6065 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6066 // ElemLVal.reduce_orig = &Origs[Cnt];
6067 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6068 llvm::Value *CastedOrig =
6069 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6070 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6071 RCG.emitAggregateType(CGF, Cnt);
6072 llvm::Value *SizeValInChars;
6073 llvm::Value *SizeVal;
6074 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6075 // We use delayed creation/initialization for VLAs and array sections. It is
6076 // required because runtime does not provide the way to pass the sizes of
6077 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6078 // threadprivate global variables are used to store these values and use
6079 // them in the functions.
6080 bool DelayedCreation = !!SizeVal;
6081 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6082 /*isSigned=*/false);
6083 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6084 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6085 // ElemLVal.reduce_init = init;
6086 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6087 llvm::Value *InitAddr =
6088 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6089 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6090 // ElemLVal.reduce_fini = fini;
6091 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6092 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6093 llvm::Value *FiniAddr = Fini
6094 ? CGF.EmitCastToVoidPtr(Fini)
6095 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6096 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6097 // ElemLVal.reduce_comb = comb;
6098 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6099 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6100 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6101 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6102 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6103 // ElemLVal.flags = 0;
6104 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6105 if (DelayedCreation) {
6106 CGF.EmitStoreOfScalar(
6107 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6108 FlagsLVal);
6109 } else
6110 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6111 FlagsLVal.getType());
6112 }
6113 if (Data.IsReductionWithTaskMod) {
6114 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6115 // is_ws, int num, void *data);
6116 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6117 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6118 CGM.IntTy, /*isSigned=*/true);
6119 llvm::Value *Args[] = {
6120 IdentTLoc, GTid,
6121 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6122 /*isSigned=*/true),
6123 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6124 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6125 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6126 return CGF.EmitRuntimeCall(
6127 OMPBuilder.getOrCreateRuntimeFunction(
6128 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6129 Args);
6130 }
6131 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6132 llvm::Value *Args[] = {
6133 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6134 /*isSigned=*/true),
6135 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6136 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6137 CGM.VoidPtrTy)};
6138 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6139 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6140 Args);
6141 }
6142
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)6143 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6144 SourceLocation Loc,
6145 bool IsWorksharingReduction) {
6146 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6147 // is_ws, int num, void *data);
6148 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6149 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6150 CGM.IntTy, /*isSigned=*/true);
6151 llvm::Value *Args[] = {IdentTLoc, GTid,
6152 llvm::ConstantInt::get(CGM.IntTy,
6153 IsWorksharingReduction ? 1 : 0,
6154 /*isSigned=*/true)};
6155 (void)CGF.EmitRuntimeCall(
6156 OMPBuilder.getOrCreateRuntimeFunction(
6157 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6158 Args);
6159 }
6160
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6161 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6162 SourceLocation Loc,
6163 ReductionCodeGen &RCG,
6164 unsigned N) {
6165 auto Sizes = RCG.getSizes(N);
6166 // Emit threadprivate global variable if the type is non-constant
6167 // (Sizes.second = nullptr).
6168 if (Sizes.second) {
6169 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6170 /*isSigned=*/false);
6171 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6172 CGF, CGM.getContext().getSizeType(),
6173 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6174 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6175 }
6176 }
6177
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6178 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6179 SourceLocation Loc,
6180 llvm::Value *ReductionsPtr,
6181 LValue SharedLVal) {
6182 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6183 // *d);
6184 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6185 CGM.IntTy,
6186 /*isSigned=*/true),
6187 ReductionsPtr,
6188 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6189 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6190 return Address(
6191 CGF.EmitRuntimeCall(
6192 OMPBuilder.getOrCreateRuntimeFunction(
6193 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6194 Args),
6195 SharedLVal.getAlignment());
6196 }
6197
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)6198 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6199 SourceLocation Loc) {
6200 if (!CGF.HaveInsertPoint())
6201 return;
6202
6203 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6204 OMPBuilder.CreateTaskwait(CGF.Builder);
6205 } else {
6206 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6207 // global_tid);
6208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6209 // Ignore return result until untied tasks are supported.
6210 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6211 CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6212 Args);
6213 }
6214
6215 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6216 Region->emitUntiedSwitch(CGF);
6217 }
6218
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6219 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6220 OpenMPDirectiveKind InnerKind,
6221 const RegionCodeGenTy &CodeGen,
6222 bool HasCancel) {
6223 if (!CGF.HaveInsertPoint())
6224 return;
6225 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6226 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6227 }
6228
6229 namespace {
6230 enum RTCancelKind {
6231 CancelNoreq = 0,
6232 CancelParallel = 1,
6233 CancelLoop = 2,
6234 CancelSections = 3,
6235 CancelTaskgroup = 4
6236 };
6237 } // anonymous namespace
6238
getCancellationKind(OpenMPDirectiveKind CancelRegion)6239 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6240 RTCancelKind CancelKind = CancelNoreq;
6241 if (CancelRegion == OMPD_parallel)
6242 CancelKind = CancelParallel;
6243 else if (CancelRegion == OMPD_for)
6244 CancelKind = CancelLoop;
6245 else if (CancelRegion == OMPD_sections)
6246 CancelKind = CancelSections;
6247 else {
6248 assert(CancelRegion == OMPD_taskgroup);
6249 CancelKind = CancelTaskgroup;
6250 }
6251 return CancelKind;
6252 }
6253
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6254 void CGOpenMPRuntime::emitCancellationPointCall(
6255 CodeGenFunction &CGF, SourceLocation Loc,
6256 OpenMPDirectiveKind CancelRegion) {
6257 if (!CGF.HaveInsertPoint())
6258 return;
6259 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6260 // global_tid, kmp_int32 cncl_kind);
6261 if (auto *OMPRegionInfo =
6262 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6263 // For 'cancellation point taskgroup', the task region info may not have a
6264 // cancel. This may instead happen in another adjacent task.
6265 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6266 llvm::Value *Args[] = {
6267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6268 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6269 // Ignore return result until untied tasks are supported.
6270 llvm::Value *Result = CGF.EmitRuntimeCall(
6271 OMPBuilder.getOrCreateRuntimeFunction(
6272 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6273 Args);
6274 // if (__kmpc_cancellationpoint()) {
6275 // exit from construct;
6276 // }
6277 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6278 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6279 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6280 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6281 CGF.EmitBlock(ExitBB);
6282 // exit from construct;
6283 CodeGenFunction::JumpDest CancelDest =
6284 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6285 CGF.EmitBranchThroughCleanup(CancelDest);
6286 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6287 }
6288 }
6289 }
6290
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6291 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6292 const Expr *IfCond,
6293 OpenMPDirectiveKind CancelRegion) {
6294 if (!CGF.HaveInsertPoint())
6295 return;
6296 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6297 // kmp_int32 cncl_kind);
6298 auto &M = CGM.getModule();
6299 if (auto *OMPRegionInfo =
6300 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6301 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6302 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6303 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6304 llvm::Value *Args[] = {
6305 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6306 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6307 // Ignore return result until untied tasks are supported.
6308 llvm::Value *Result = CGF.EmitRuntimeCall(
6309 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6310 // if (__kmpc_cancel()) {
6311 // exit from construct;
6312 // }
6313 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317 CGF.EmitBlock(ExitBB);
6318 // exit from construct;
6319 CodeGenFunction::JumpDest CancelDest =
6320 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6321 CGF.EmitBranchThroughCleanup(CancelDest);
6322 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6323 };
6324 if (IfCond) {
6325 emitIfClause(CGF, IfCond, ThenGen,
6326 [](CodeGenFunction &, PrePostActionTy &) {});
6327 } else {
6328 RegionCodeGenTy ThenRCG(ThenGen);
6329 ThenRCG(CGF);
6330 }
6331 }
6332 }
6333
6334 namespace {
6335 /// Cleanup action for uses_allocators support.
6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6337 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6338
6339 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6340 OMPUsesAllocatorsActionTy(
6341 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6342 : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6343 void Enter(CodeGenFunction &CGF) override {
6344 if (!CGF.HaveInsertPoint())
6345 return;
6346 for (const auto &AllocatorData : Allocators) {
6347 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6348 CGF, AllocatorData.first, AllocatorData.second);
6349 }
6350 }
Exit(CodeGenFunction & CGF)6351 void Exit(CodeGenFunction &CGF) override {
6352 if (!CGF.HaveInsertPoint())
6353 return;
6354 for (const auto &AllocatorData : Allocators) {
6355 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6356 AllocatorData.first);
6357 }
6358 }
6359 };
6360 } // namespace
6361
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6362 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6363 const OMPExecutableDirective &D, StringRef ParentName,
6364 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6365 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6366 assert(!ParentName.empty() && "Invalid target region parent name!");
6367 HasEmittedTargetRegion = true;
6368 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6369 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6370 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6371 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6372 if (!D.AllocatorTraits)
6373 continue;
6374 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6375 }
6376 }
6377 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6378 CodeGen.setAction(UsesAllocatorAction);
6379 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6380 IsOffloadEntry, CodeGen);
6381 }
6382
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6384 const Expr *Allocator,
6385 const Expr *AllocatorTraits) {
6386 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6387 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6388 // Use default memspace handle.
6389 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6390 llvm::Value *NumTraits = llvm::ConstantInt::get(
6391 CGF.IntTy, cast<ConstantArrayType>(
6392 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6393 ->getSize()
6394 .getLimitedValue());
6395 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6396 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6397 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6398 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6399 AllocatorTraitsLVal.getBaseInfo(),
6400 AllocatorTraitsLVal.getTBAAInfo());
6401 llvm::Value *Traits =
6402 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6403
6404 llvm::Value *AllocatorVal =
6405 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6406 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6407 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6408 // Store to allocator.
6409 CGF.EmitVarDecl(*cast<VarDecl>(
6410 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6411 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6412 AllocatorVal =
6413 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6414 Allocator->getType(), Allocator->getExprLoc());
6415 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6416 }
6417
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6419 const Expr *Allocator) {
6420 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6421 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6422 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6423 llvm::Value *AllocatorVal =
6424 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6425 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6426 CGF.getContext().VoidPtrTy,
6427 Allocator->getExprLoc());
6428 (void)CGF.EmitRuntimeCall(
6429 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6430 OMPRTL___kmpc_destroy_allocator),
6431 {ThreadId, AllocatorVal});
6432 }
6433
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6435 const OMPExecutableDirective &D, StringRef ParentName,
6436 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6437 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6438 // Create a unique name for the entry function using the source location
6439 // information of the current target region. The name will be something like:
6440 //
6441 // __omp_offloading_DD_FFFF_PP_lBB
6442 //
6443 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6444 // mangled name of the function that encloses the target region and BB is the
6445 // line number of the target region.
6446
6447 unsigned DeviceID;
6448 unsigned FileID;
6449 unsigned Line;
6450 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6451 Line);
6452 SmallString<64> EntryFnName;
6453 {
6454 llvm::raw_svector_ostream OS(EntryFnName);
6455 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6456 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6457 }
6458
6459 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6460
6461 CodeGenFunction CGF(CGM, true);
6462 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6463 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6464
6465 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6466
6467 // If this target outline function is not an offload entry, we don't need to
6468 // register it.
6469 if (!IsOffloadEntry)
6470 return;
6471
6472 // The target region ID is used by the runtime library to identify the current
6473 // target region, so it only has to be unique and not necessarily point to
6474 // anything. It could be the pointer to the outlined function that implements
6475 // the target region, but we aren't using that so that the compiler doesn't
6476 // need to keep that, and could therefore inline the host function if proven
6477 // worthwhile during optimization. In the other hand, if emitting code for the
6478 // device, the ID has to be the function address so that it can retrieved from
6479 // the offloading entry and launched by the runtime library. We also mark the
6480 // outlined function to have external linkage in case we are emitting code for
6481 // the device, because these functions will be entry points to the device.
6482
6483 if (CGM.getLangOpts().OpenMPIsDevice) {
6484 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6485 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6486 OutlinedFn->setDSOLocal(false);
6487 } else {
6488 std::string Name = getName({EntryFnName, "region_id"});
6489 OutlinedFnID = new llvm::GlobalVariable(
6490 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6491 llvm::GlobalValue::WeakAnyLinkage,
6492 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6493 }
6494
6495 // Register the information for the entry associated with this target region.
6496 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6497 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6498 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6499 }
6500
6501 /// Checks if the expression is constant or does not have non-trivial function
6502 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6504 // We can skip constant expressions.
6505 // We can skip expressions with trivial calls or simple expressions.
6506 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6507 !E->hasNonTrivialCall(Ctx)) &&
6508 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6509 }
6510
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6512 const Stmt *Body) {
6513 const Stmt *Child = Body->IgnoreContainers();
6514 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6515 Child = nullptr;
6516 for (const Stmt *S : C->body()) {
6517 if (const auto *E = dyn_cast<Expr>(S)) {
6518 if (isTrivial(Ctx, E))
6519 continue;
6520 }
6521 // Some of the statements can be ignored.
6522 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6523 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6524 continue;
6525 // Analyze declarations.
6526 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6527 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6528 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6529 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6530 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6531 isa<UsingDirectiveDecl>(D) ||
6532 isa<OMPDeclareReductionDecl>(D) ||
6533 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6534 return true;
6535 const auto *VD = dyn_cast<VarDecl>(D);
6536 if (!VD)
6537 return false;
6538 return VD->isConstexpr() ||
6539 ((VD->getType().isTrivialType(Ctx) ||
6540 VD->getType()->isReferenceType()) &&
6541 (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6542 }))
6543 continue;
6544 }
6545 // Found multiple children - cannot get the one child only.
6546 if (Child)
6547 return nullptr;
6548 Child = S;
6549 }
6550 if (Child)
6551 Child = Child->IgnoreContainers();
6552 }
6553 return Child;
6554 }
6555
6556 /// Emit the number of teams for a target directive. Inspect the num_teams
6557 /// clause associated with a teams construct combined or closely nested
6558 /// with the target directive.
6559 ///
6560 /// Emit a team of size one for directives such as 'target parallel' that
6561 /// have no associated teams construct.
6562 ///
6563 /// Otherwise, return nullptr.
6564 static llvm::Value *
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6566 const OMPExecutableDirective &D) {
6567 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6568 "Clauses associated with the teams directive expected to be emitted "
6569 "only for the host!");
6570 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6571 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6572 "Expected target-based executable directive.");
6573 CGBuilderTy &Bld = CGF.Builder;
6574 switch (DirectiveKind) {
6575 case OMPD_target: {
6576 const auto *CS = D.getInnermostCapturedStmt();
6577 const auto *Body =
6578 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6579 const Stmt *ChildStmt =
6580 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6581 if (const auto *NestedDir =
6582 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6583 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6584 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6585 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6587 const Expr *NumTeams =
6588 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6589 llvm::Value *NumTeamsVal =
6590 CGF.EmitScalarExpr(NumTeams,
6591 /*IgnoreResultAssign*/ true);
6592 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6593 /*isSigned=*/true);
6594 }
6595 return Bld.getInt32(0);
6596 }
6597 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6598 isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6599 return Bld.getInt32(1);
6600 return Bld.getInt32(0);
6601 }
6602 return nullptr;
6603 }
6604 case OMPD_target_teams:
6605 case OMPD_target_teams_distribute:
6606 case OMPD_target_teams_distribute_simd:
6607 case OMPD_target_teams_distribute_parallel_for:
6608 case OMPD_target_teams_distribute_parallel_for_simd: {
6609 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6610 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611 const Expr *NumTeams =
6612 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6613 llvm::Value *NumTeamsVal =
6614 CGF.EmitScalarExpr(NumTeams,
6615 /*IgnoreResultAssign*/ true);
6616 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6617 /*isSigned=*/true);
6618 }
6619 return Bld.getInt32(0);
6620 }
6621 case OMPD_target_parallel:
6622 case OMPD_target_parallel_for:
6623 case OMPD_target_parallel_for_simd:
6624 case OMPD_target_simd:
6625 return Bld.getInt32(1);
6626 case OMPD_parallel:
6627 case OMPD_for:
6628 case OMPD_parallel_for:
6629 case OMPD_parallel_master:
6630 case OMPD_parallel_sections:
6631 case OMPD_for_simd:
6632 case OMPD_parallel_for_simd:
6633 case OMPD_cancel:
6634 case OMPD_cancellation_point:
6635 case OMPD_ordered:
6636 case OMPD_threadprivate:
6637 case OMPD_allocate:
6638 case OMPD_task:
6639 case OMPD_simd:
6640 case OMPD_sections:
6641 case OMPD_section:
6642 case OMPD_single:
6643 case OMPD_master:
6644 case OMPD_critical:
6645 case OMPD_taskyield:
6646 case OMPD_barrier:
6647 case OMPD_taskwait:
6648 case OMPD_taskgroup:
6649 case OMPD_atomic:
6650 case OMPD_flush:
6651 case OMPD_depobj:
6652 case OMPD_scan:
6653 case OMPD_teams:
6654 case OMPD_target_data:
6655 case OMPD_target_exit_data:
6656 case OMPD_target_enter_data:
6657 case OMPD_distribute:
6658 case OMPD_distribute_simd:
6659 case OMPD_distribute_parallel_for:
6660 case OMPD_distribute_parallel_for_simd:
6661 case OMPD_teams_distribute:
6662 case OMPD_teams_distribute_simd:
6663 case OMPD_teams_distribute_parallel_for:
6664 case OMPD_teams_distribute_parallel_for_simd:
6665 case OMPD_target_update:
6666 case OMPD_declare_simd:
6667 case OMPD_declare_variant:
6668 case OMPD_begin_declare_variant:
6669 case OMPD_end_declare_variant:
6670 case OMPD_declare_target:
6671 case OMPD_end_declare_target:
6672 case OMPD_declare_reduction:
6673 case OMPD_declare_mapper:
6674 case OMPD_taskloop:
6675 case OMPD_taskloop_simd:
6676 case OMPD_master_taskloop:
6677 case OMPD_master_taskloop_simd:
6678 case OMPD_parallel_master_taskloop:
6679 case OMPD_parallel_master_taskloop_simd:
6680 case OMPD_requires:
6681 case OMPD_unknown:
6682 break;
6683 default:
6684 break;
6685 }
6686 llvm_unreachable("Unexpected directive kind.");
6687 }
6688
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6689 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6690 llvm::Value *DefaultThreadLimitVal) {
6691 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6692 CGF.getContext(), CS->getCapturedStmt());
6693 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6694 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6695 llvm::Value *NumThreads = nullptr;
6696 llvm::Value *CondVal = nullptr;
6697 // Handle if clause. If if clause present, the number of threads is
6698 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6699 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6700 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6701 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6702 const OMPIfClause *IfClause = nullptr;
6703 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6704 if (C->getNameModifier() == OMPD_unknown ||
6705 C->getNameModifier() == OMPD_parallel) {
6706 IfClause = C;
6707 break;
6708 }
6709 }
6710 if (IfClause) {
6711 const Expr *Cond = IfClause->getCondition();
6712 bool Result;
6713 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6714 if (!Result)
6715 return CGF.Builder.getInt32(1);
6716 } else {
6717 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6718 if (const auto *PreInit =
6719 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6720 for (const auto *I : PreInit->decls()) {
6721 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6722 CGF.EmitVarDecl(cast<VarDecl>(*I));
6723 } else {
6724 CodeGenFunction::AutoVarEmission Emission =
6725 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6726 CGF.EmitAutoVarCleanups(Emission);
6727 }
6728 }
6729 }
6730 CondVal = CGF.EvaluateExprAsBool(Cond);
6731 }
6732 }
6733 }
6734 // Check the value of num_threads clause iff if clause was not specified
6735 // or is not evaluated to false.
6736 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6737 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6738 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6739 const auto *NumThreadsClause =
6740 Dir->getSingleClause<OMPNumThreadsClause>();
6741 CodeGenFunction::LexicalScope Scope(
6742 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6743 if (const auto *PreInit =
6744 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6745 for (const auto *I : PreInit->decls()) {
6746 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6747 CGF.EmitVarDecl(cast<VarDecl>(*I));
6748 } else {
6749 CodeGenFunction::AutoVarEmission Emission =
6750 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6751 CGF.EmitAutoVarCleanups(Emission);
6752 }
6753 }
6754 }
6755 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6756 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6757 /*isSigned=*/false);
6758 if (DefaultThreadLimitVal)
6759 NumThreads = CGF.Builder.CreateSelect(
6760 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6761 DefaultThreadLimitVal, NumThreads);
6762 } else {
6763 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6764 : CGF.Builder.getInt32(0);
6765 }
6766 // Process condition of the if clause.
6767 if (CondVal) {
6768 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6769 CGF.Builder.getInt32(1));
6770 }
6771 return NumThreads;
6772 }
6773 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6774 return CGF.Builder.getInt32(1);
6775 return DefaultThreadLimitVal;
6776 }
6777 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6778 : CGF.Builder.getInt32(0);
6779 }
6780
6781 /// Emit the number of threads for a target directive. Inspect the
6782 /// thread_limit clause associated with a teams construct combined or closely
6783 /// nested with the target directive.
6784 ///
6785 /// Emit the num_threads clause for directives such as 'target parallel' that
6786 /// have no associated teams construct.
6787 ///
6788 /// Otherwise, return nullptr.
6789 static llvm::Value *
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6790 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6791 const OMPExecutableDirective &D) {
6792 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6793 "Clauses associated with the teams directive expected to be emitted "
6794 "only for the host!");
6795 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6796 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6797 "Expected target-based executable directive.");
6798 CGBuilderTy &Bld = CGF.Builder;
6799 llvm::Value *ThreadLimitVal = nullptr;
6800 llvm::Value *NumThreadsVal = nullptr;
6801 switch (DirectiveKind) {
6802 case OMPD_target: {
6803 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6804 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6805 return NumThreads;
6806 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6807 CGF.getContext(), CS->getCapturedStmt());
6808 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6809 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6810 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6811 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6812 const auto *ThreadLimitClause =
6813 Dir->getSingleClause<OMPThreadLimitClause>();
6814 CodeGenFunction::LexicalScope Scope(
6815 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6816 if (const auto *PreInit =
6817 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6818 for (const auto *I : PreInit->decls()) {
6819 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6820 CGF.EmitVarDecl(cast<VarDecl>(*I));
6821 } else {
6822 CodeGenFunction::AutoVarEmission Emission =
6823 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6824 CGF.EmitAutoVarCleanups(Emission);
6825 }
6826 }
6827 }
6828 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6829 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6830 ThreadLimitVal =
6831 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6832 }
6833 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6834 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6835 CS = Dir->getInnermostCapturedStmt();
6836 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6837 CGF.getContext(), CS->getCapturedStmt());
6838 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6839 }
6840 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6841 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6842 CS = Dir->getInnermostCapturedStmt();
6843 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6844 return NumThreads;
6845 }
6846 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6847 return Bld.getInt32(1);
6848 }
6849 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6850 }
6851 case OMPD_target_teams: {
6852 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6853 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6854 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6855 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6856 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6857 ThreadLimitVal =
6858 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6859 }
6860 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6861 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6862 return NumThreads;
6863 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6864 CGF.getContext(), CS->getCapturedStmt());
6865 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6866 if (Dir->getDirectiveKind() == OMPD_distribute) {
6867 CS = Dir->getInnermostCapturedStmt();
6868 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6869 return NumThreads;
6870 }
6871 }
6872 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6873 }
6874 case OMPD_target_teams_distribute:
6875 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6876 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6877 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6878 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6879 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6880 ThreadLimitVal =
6881 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6882 }
6883 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6884 case OMPD_target_parallel:
6885 case OMPD_target_parallel_for:
6886 case OMPD_target_parallel_for_simd:
6887 case OMPD_target_teams_distribute_parallel_for:
6888 case OMPD_target_teams_distribute_parallel_for_simd: {
6889 llvm::Value *CondVal = nullptr;
6890 // Handle if clause. If if clause present, the number of threads is
6891 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6892 if (D.hasClausesOfKind<OMPIfClause>()) {
6893 const OMPIfClause *IfClause = nullptr;
6894 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6895 if (C->getNameModifier() == OMPD_unknown ||
6896 C->getNameModifier() == OMPD_parallel) {
6897 IfClause = C;
6898 break;
6899 }
6900 }
6901 if (IfClause) {
6902 const Expr *Cond = IfClause->getCondition();
6903 bool Result;
6904 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6905 if (!Result)
6906 return Bld.getInt32(1);
6907 } else {
6908 CodeGenFunction::RunCleanupsScope Scope(CGF);
6909 CondVal = CGF.EvaluateExprAsBool(Cond);
6910 }
6911 }
6912 }
6913 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6914 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6915 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6916 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6917 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6918 ThreadLimitVal =
6919 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6920 }
6921 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6922 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6923 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6924 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6925 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6926 NumThreadsVal =
6927 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6928 ThreadLimitVal = ThreadLimitVal
6929 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6930 ThreadLimitVal),
6931 NumThreadsVal, ThreadLimitVal)
6932 : NumThreadsVal;
6933 }
6934 if (!ThreadLimitVal)
6935 ThreadLimitVal = Bld.getInt32(0);
6936 if (CondVal)
6937 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6938 return ThreadLimitVal;
6939 }
6940 case OMPD_target_teams_distribute_simd:
6941 case OMPD_target_simd:
6942 return Bld.getInt32(1);
6943 case OMPD_parallel:
6944 case OMPD_for:
6945 case OMPD_parallel_for:
6946 case OMPD_parallel_master:
6947 case OMPD_parallel_sections:
6948 case OMPD_for_simd:
6949 case OMPD_parallel_for_simd:
6950 case OMPD_cancel:
6951 case OMPD_cancellation_point:
6952 case OMPD_ordered:
6953 case OMPD_threadprivate:
6954 case OMPD_allocate:
6955 case OMPD_task:
6956 case OMPD_simd:
6957 case OMPD_sections:
6958 case OMPD_section:
6959 case OMPD_single:
6960 case OMPD_master:
6961 case OMPD_critical:
6962 case OMPD_taskyield:
6963 case OMPD_barrier:
6964 case OMPD_taskwait:
6965 case OMPD_taskgroup:
6966 case OMPD_atomic:
6967 case OMPD_flush:
6968 case OMPD_depobj:
6969 case OMPD_scan:
6970 case OMPD_teams:
6971 case OMPD_target_data:
6972 case OMPD_target_exit_data:
6973 case OMPD_target_enter_data:
6974 case OMPD_distribute:
6975 case OMPD_distribute_simd:
6976 case OMPD_distribute_parallel_for:
6977 case OMPD_distribute_parallel_for_simd:
6978 case OMPD_teams_distribute:
6979 case OMPD_teams_distribute_simd:
6980 case OMPD_teams_distribute_parallel_for:
6981 case OMPD_teams_distribute_parallel_for_simd:
6982 case OMPD_target_update:
6983 case OMPD_declare_simd:
6984 case OMPD_declare_variant:
6985 case OMPD_begin_declare_variant:
6986 case OMPD_end_declare_variant:
6987 case OMPD_declare_target:
6988 case OMPD_end_declare_target:
6989 case OMPD_declare_reduction:
6990 case OMPD_declare_mapper:
6991 case OMPD_taskloop:
6992 case OMPD_taskloop_simd:
6993 case OMPD_master_taskloop:
6994 case OMPD_master_taskloop_simd:
6995 case OMPD_parallel_master_taskloop:
6996 case OMPD_parallel_master_taskloop_simd:
6997 case OMPD_requires:
6998 case OMPD_unknown:
6999 break;
7000 default:
7001 break;
7002 }
7003 llvm_unreachable("Unsupported directive kind.");
7004 }
7005
7006 namespace {
7007 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7008
7009 // Utility to handle information from clauses associated with a given
7010 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7011 // It provides a convenient interface to obtain the information and generate
7012 // code for that information.
7013 class MappableExprsHandler {
7014 public:
7015 /// Values for bit flags used to specify the mapping type for
7016 /// offloading.
7017 enum OpenMPOffloadMappingFlags : uint64_t {
7018 /// No flags
7019 OMP_MAP_NONE = 0x0,
7020 /// Allocate memory on the device and move data from host to device.
7021 OMP_MAP_TO = 0x01,
7022 /// Allocate memory on the device and move data from device to host.
7023 OMP_MAP_FROM = 0x02,
7024 /// Always perform the requested mapping action on the element, even
7025 /// if it was already mapped before.
7026 OMP_MAP_ALWAYS = 0x04,
7027 /// Delete the element from the device environment, ignoring the
7028 /// current reference count associated with the element.
7029 OMP_MAP_DELETE = 0x08,
7030 /// The element being mapped is a pointer-pointee pair; both the
7031 /// pointer and the pointee should be mapped.
7032 OMP_MAP_PTR_AND_OBJ = 0x10,
7033 /// This flags signals that the base address of an entry should be
7034 /// passed to the target kernel as an argument.
7035 OMP_MAP_TARGET_PARAM = 0x20,
7036 /// Signal that the runtime library has to return the device pointer
7037 /// in the current position for the data being mapped. Used when we have the
7038 /// use_device_ptr or use_device_addr clause.
7039 OMP_MAP_RETURN_PARAM = 0x40,
7040 /// This flag signals that the reference being passed is a pointer to
7041 /// private data.
7042 OMP_MAP_PRIVATE = 0x80,
7043 /// Pass the element to the device by value.
7044 OMP_MAP_LITERAL = 0x100,
7045 /// Implicit map
7046 OMP_MAP_IMPLICIT = 0x200,
7047 /// Close is a hint to the runtime to allocate memory close to
7048 /// the target device.
7049 OMP_MAP_CLOSE = 0x400,
7050 /// The 16 MSBs of the flags indicate whether the entry is member of some
7051 /// struct/class.
7052 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7053 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7054 };
7055
7056 /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()7057 static unsigned getFlagMemberOffset() {
7058 unsigned Offset = 0;
7059 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7060 Remain = Remain >> 1)
7061 Offset++;
7062 return Offset;
7063 }
7064
7065 /// Class that associates information with a base pointer to be passed to the
7066 /// runtime library.
7067 class BasePointerInfo {
7068 /// The base pointer.
7069 llvm::Value *Ptr = nullptr;
7070 /// The base declaration that refers to this device pointer, or null if
7071 /// there is none.
7072 const ValueDecl *DevPtrDecl = nullptr;
7073
7074 public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)7075 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7076 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const7077 llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const7078 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)7079 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7080 };
7081
7082 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7083 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7084 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7085
7086 /// Map between a struct and the its lowest & highest elements which have been
7087 /// mapped.
7088 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7089 /// HE(FieldIndex, Pointer)}
7090 struct StructRangeInfoTy {
7091 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7092 0, Address::invalid()};
7093 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7094 0, Address::invalid()};
7095 Address Base = Address::invalid();
7096 };
7097
7098 private:
7099 /// Kind that defines how a device pointer has to be returned.
7100 struct MapInfo {
7101 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7102 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7103 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7104 bool ReturnDevicePointer = false;
7105 bool IsImplicit = false;
7106 bool ForDeviceAddr = false;
7107
7108 MapInfo() = default;
MapInfo__anone0633a093d11::MappableExprsHandler::MapInfo7109 MapInfo(
7110 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7111 OpenMPMapClauseKind MapType,
7112 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7113 bool IsImplicit, bool ForDeviceAddr = false)
7114 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7115 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7116 ForDeviceAddr(ForDeviceAddr) {}
7117 };
7118
7119 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7120 /// member and there is no map information about it, then emission of that
7121 /// entry is deferred until the whole struct has been processed.
7122 struct DeferredDevicePtrEntryTy {
7123 const Expr *IE = nullptr;
7124 const ValueDecl *VD = nullptr;
7125 bool ForDeviceAddr = false;
7126
DeferredDevicePtrEntryTy__anone0633a093d11::MappableExprsHandler::DeferredDevicePtrEntryTy7127 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7128 bool ForDeviceAddr)
7129 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7130 };
7131
7132 /// The target directive from where the mappable clauses were extracted. It
7133 /// is either a executable directive or a user-defined mapper directive.
7134 llvm::PointerUnion<const OMPExecutableDirective *,
7135 const OMPDeclareMapperDecl *>
7136 CurDir;
7137
7138 /// Function the directive is being generated for.
7139 CodeGenFunction &CGF;
7140
7141 /// Set of all first private variables in the current directive.
7142 /// bool data is set to true if the variable is implicitly marked as
7143 /// firstprivate, false otherwise.
7144 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7145
7146 /// Map between device pointer declarations and their expression components.
7147 /// The key value for declarations in 'this' is null.
7148 llvm::DenseMap<
7149 const ValueDecl *,
7150 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7151 DevPointersMap;
7152
getExprTypeSize(const Expr * E) const7153 llvm::Value *getExprTypeSize(const Expr *E) const {
7154 QualType ExprTy = E->getType().getCanonicalType();
7155
7156 // Calculate the size for array shaping expression.
7157 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7158 llvm::Value *Size =
7159 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7160 for (const Expr *SE : OAE->getDimensions()) {
7161 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7162 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7163 CGF.getContext().getSizeType(),
7164 SE->getExprLoc());
7165 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7166 }
7167 return Size;
7168 }
7169
7170 // Reference types are ignored for mapping purposes.
7171 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7172 ExprTy = RefTy->getPointeeType().getCanonicalType();
7173
7174 // Given that an array section is considered a built-in type, we need to
7175 // do the calculation based on the length of the section instead of relying
7176 // on CGF.getTypeSize(E->getType()).
7177 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7178 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7179 OAE->getBase()->IgnoreParenImpCasts())
7180 .getCanonicalType();
7181
7182 // If there is no length associated with the expression and lower bound is
7183 // not specified too, that means we are using the whole length of the
7184 // base.
7185 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7186 !OAE->getLowerBound())
7187 return CGF.getTypeSize(BaseTy);
7188
7189 llvm::Value *ElemSize;
7190 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192 } else {
7193 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194 assert(ATy && "Expecting array type if not a pointer type.");
7195 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196 }
7197
7198 // If we don't have a length at this point, that is because we have an
7199 // array section with a single element.
7200 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7201 return ElemSize;
7202
7203 if (const Expr *LenExpr = OAE->getLength()) {
7204 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7205 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7206 CGF.getContext().getSizeType(),
7207 LenExpr->getExprLoc());
7208 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7209 }
7210 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7211 OAE->getLowerBound() && "expected array_section[lb:].");
7212 // Size = sizetype - lb * elemtype;
7213 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7214 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7215 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7216 CGF.getContext().getSizeType(),
7217 OAE->getLowerBound()->getExprLoc());
7218 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7219 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7220 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7221 LengthVal = CGF.Builder.CreateSelect(
7222 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7223 return LengthVal;
7224 }
7225 return CGF.getTypeSize(ExprTy);
7226 }
7227
7228 /// Return the corresponding bits for a given map clause modifier. Add
7229 /// a flag marking the map as a pointer if requested. Add a flag marking the
7230 /// map as the first one of a series of maps that relate to the same map
7231 /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag) const7232 OpenMPOffloadMappingFlags getMapTypeBits(
7233 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7234 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7235 OpenMPOffloadMappingFlags Bits =
7236 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7237 switch (MapType) {
7238 case OMPC_MAP_alloc:
7239 case OMPC_MAP_release:
7240 // alloc and release is the default behavior in the runtime library, i.e.
7241 // if we don't pass any bits alloc/release that is what the runtime is
7242 // going to do. Therefore, we don't need to signal anything for these two
7243 // type modifiers.
7244 break;
7245 case OMPC_MAP_to:
7246 Bits |= OMP_MAP_TO;
7247 break;
7248 case OMPC_MAP_from:
7249 Bits |= OMP_MAP_FROM;
7250 break;
7251 case OMPC_MAP_tofrom:
7252 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7253 break;
7254 case OMPC_MAP_delete:
7255 Bits |= OMP_MAP_DELETE;
7256 break;
7257 case OMPC_MAP_unknown:
7258 llvm_unreachable("Unexpected map type!");
7259 }
7260 if (AddPtrFlag)
7261 Bits |= OMP_MAP_PTR_AND_OBJ;
7262 if (AddIsTargetParamFlag)
7263 Bits |= OMP_MAP_TARGET_PARAM;
7264 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7265 != MapModifiers.end())
7266 Bits |= OMP_MAP_ALWAYS;
7267 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7268 != MapModifiers.end())
7269 Bits |= OMP_MAP_CLOSE;
7270 return Bits;
7271 }
7272
7273 /// Return true if the provided expression is a final array section. A
7274 /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7275 bool isFinalArraySectionExpression(const Expr *E) const {
7276 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7277
7278 // It is not an array section and therefore not a unity-size one.
7279 if (!OASE)
7280 return false;
7281
7282 // An array section with no colon always refer to a single element.
7283 if (OASE->getColonLocFirst().isInvalid())
7284 return false;
7285
7286 const Expr *Length = OASE->getLength();
7287
7288 // If we don't have a length we have to check if the array has size 1
7289 // for this dimension. Also, we should always expect a length if the
7290 // base type is pointer.
7291 if (!Length) {
7292 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7293 OASE->getBase()->IgnoreParenImpCasts())
7294 .getCanonicalType();
7295 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7296 return ATy->getSize().getSExtValue() != 1;
7297 // If we don't have a constant dimension length, we have to consider
7298 // the current section as having any size, so it is not necessarily
7299 // unitary. If it happen to be unity size, that's user fault.
7300 return true;
7301 }
7302
7303 // Check if the length evaluates to 1.
7304 Expr::EvalResult Result;
7305 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7306 return true; // Can have more that size 1.
7307
7308 llvm::APSInt ConstLength = Result.Val.getInt();
7309 return ConstLength.getSExtValue() != 1;
7310 }
7311
7312 /// Generate the base pointers, section pointers, sizes and map type
7313 /// bits for the provided map type, map modifier, and expression components.
7314 /// \a IsFirstComponent should be set to true if the provided set of
7315 /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,bool ForDeviceAddr=false,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7316 void generateInfoForComponentList(
7317 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7318 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7319 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7320 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7321 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7322 bool IsImplicit, bool ForDeviceAddr = false,
7323 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7324 OverlappedElements = llvm::None) const {
7325 // The following summarizes what has to be generated for each map and the
7326 // types below. The generated information is expressed in this order:
7327 // base pointer, section pointer, size, flags
7328 // (to add to the ones that come from the map type and modifier).
7329 //
7330 // double d;
7331 // int i[100];
7332 // float *p;
7333 //
7334 // struct S1 {
7335 // int i;
7336 // float f[50];
7337 // }
7338 // struct S2 {
7339 // int i;
7340 // float f[50];
7341 // S1 s;
7342 // double *p;
7343 // struct S2 *ps;
7344 // }
7345 // S2 s;
7346 // S2 *ps;
7347 //
7348 // map(d)
7349 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7350 //
7351 // map(i)
7352 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7353 //
7354 // map(i[1:23])
7355 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7356 //
7357 // map(p)
7358 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7359 //
7360 // map(p[1:24])
7361 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7362 //
7363 // map(s)
7364 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7365 //
7366 // map(s.i)
7367 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7368 //
7369 // map(s.s.f)
7370 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7371 //
7372 // map(s.p)
7373 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7374 //
7375 // map(to: s.p[:22])
7376 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7377 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7378 // &(s.p), &(s.p[0]), 22*sizeof(double),
7379 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7380 // (*) alloc space for struct members, only this is a target parameter
7381 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7382 // optimizes this entry out, same in the examples below)
7383 // (***) map the pointee (map: to)
7384 //
7385 // map(s.ps)
7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7387 //
7388 // map(from: s.ps->s.i)
7389 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7390 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7391 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7392 //
7393 // map(to: s.ps->ps)
7394 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7395 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7396 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7397 //
7398 // map(s.ps->ps->ps)
7399 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7400 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7401 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7402 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7403 //
7404 // map(to: s.ps->ps->s.f[:22])
7405 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7406 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7407 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7408 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7409 //
7410 // map(ps)
7411 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7412 //
7413 // map(ps->i)
7414 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7415 //
7416 // map(ps->s.f)
7417 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7418 //
7419 // map(from: ps->p)
7420 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7421 //
7422 // map(to: ps->p[:22])
7423 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7424 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7425 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7426 //
7427 // map(ps->ps)
7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7429 //
7430 // map(from: ps->ps->s.i)
7431 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7432 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7433 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7434 //
7435 // map(from: ps->ps->ps)
7436 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7437 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7438 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439 //
7440 // map(ps->ps->ps->ps)
7441 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7442 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7443 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7444 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7445 //
7446 // map(to: ps->ps->ps->s.f[:22])
7447 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7448 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7449 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7450 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7451 //
7452 // map(to: s.f[:22]) map(from: s.p[:33])
7453 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7454 // sizeof(double*) (**), TARGET_PARAM
7455 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7456 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7457 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7458 // (*) allocate contiguous space needed to fit all mapped members even if
7459 // we allocate space for members not mapped (in this example,
7460 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7461 // them as well because they fall between &s.f[0] and &s.p)
7462 //
7463 // map(from: s.f[:22]) map(to: ps->p[:33])
7464 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7465 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7466 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7467 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7468 // (*) the struct this entry pertains to is the 2nd element in the list of
7469 // arguments, hence MEMBER_OF(2)
7470 //
7471 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7472 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7473 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7474 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7475 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7476 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7477 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7478 // (*) the struct this entry pertains to is the 4th element in the list
7479 // of arguments, hence MEMBER_OF(4)
7480
7481 // Track if the map information being generated is the first for a capture.
7482 bool IsCaptureFirstInfo = IsFirstComponentList;
7483 // When the variable is on a declare target link or in a to clause with
7484 // unified memory, a reference is needed to hold the host/device address
7485 // of the variable.
7486 bool RequiresReference = false;
7487
7488 // Scan the components from the base to the complete expression.
7489 auto CI = Components.rbegin();
7490 auto CE = Components.rend();
7491 auto I = CI;
7492
7493 // Track if the map information being generated is the first for a list of
7494 // components.
7495 bool IsExpressionFirstInfo = true;
7496 Address BP = Address::invalid();
7497 const Expr *AssocExpr = I->getAssociatedExpression();
7498 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7499 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7500 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7501
7502 if (isa<MemberExpr>(AssocExpr)) {
7503 // The base is the 'this' pointer. The content of the pointer is going
7504 // to be the base of the field being mapped.
7505 BP = CGF.LoadCXXThisAddress();
7506 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7507 (OASE &&
7508 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7509 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7510 } else if (OAShE &&
7511 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7512 BP = Address(
7513 CGF.EmitScalarExpr(OAShE->getBase()),
7514 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7515 } else {
7516 // The base is the reference to the variable.
7517 // BP = &Var.
7518 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7519 if (const auto *VD =
7520 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7521 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7523 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7524 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7525 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7526 RequiresReference = true;
7527 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7528 }
7529 }
7530 }
7531
7532 // If the variable is a pointer and is being dereferenced (i.e. is not
7533 // the last component), the base has to be the pointer itself, not its
7534 // reference. References are ignored for mapping purposes.
7535 QualType Ty =
7536 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7537 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7538 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7539
7540 // We do not need to generate individual map information for the
7541 // pointer, it can be associated with the combined storage.
7542 ++I;
7543 }
7544 }
7545
7546 // Track whether a component of the list should be marked as MEMBER_OF some
7547 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7548 // in a component list should be marked as MEMBER_OF, all subsequent entries
7549 // do not belong to the base struct. E.g.
7550 // struct S2 s;
7551 // s.ps->ps->ps->f[:]
7552 // (1) (2) (3) (4)
7553 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7554 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7555 // is the pointee of ps(2) which is not member of struct s, so it should not
7556 // be marked as such (it is still PTR_AND_OBJ).
7557 // The variable is initialized to false so that PTR_AND_OBJ entries which
7558 // are not struct members are not considered (e.g. array of pointers to
7559 // data).
7560 bool ShouldBeMemberOf = false;
7561
7562 // Variable keeping track of whether or not we have encountered a component
7563 // in the component list which is a member expression. Useful when we have a
7564 // pointer or a final array section, in which case it is the previous
7565 // component in the list which tells us whether we have a member expression.
7566 // E.g. X.f[:]
7567 // While processing the final array section "[:]" it is "f" which tells us
7568 // whether we are dealing with a member of a declared struct.
7569 const MemberExpr *EncounteredME = nullptr;
7570
7571 for (; I != CE; ++I) {
7572 // If the current component is member of a struct (parent struct) mark it.
7573 if (!EncounteredME) {
7574 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7575 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7576 // as MEMBER_OF the parent struct.
7577 if (EncounteredME)
7578 ShouldBeMemberOf = true;
7579 }
7580
7581 auto Next = std::next(I);
7582
7583 // We need to generate the addresses and sizes if this is the last
7584 // component, if the component is a pointer or if it is an array section
7585 // whose length can't be proved to be one. If this is a pointer, it
7586 // becomes the base address for the following components.
7587
7588 // A final array section, is one whose length can't be proved to be one.
7589 bool IsFinalArraySection =
7590 isFinalArraySectionExpression(I->getAssociatedExpression());
7591
7592 // Get information on whether the element is a pointer. Have to do a
7593 // special treatment for array sections given that they are built-in
7594 // types.
7595 const auto *OASE =
7596 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7597 const auto *OAShE =
7598 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7599 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7600 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7601 bool IsPointer =
7602 OAShE ||
7603 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7604 .getCanonicalType()
7605 ->isAnyPointerType()) ||
7606 I->getAssociatedExpression()->getType()->isAnyPointerType();
7607 bool IsNonDerefPointer = IsPointer && !UO && !BO;
7608
7609 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7610 // If this is not the last component, we expect the pointer to be
7611 // associated with an array expression or member expression.
7612 assert((Next == CE ||
7613 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7614 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7615 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7616 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7617 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7618 "Unexpected expression");
7619
7620 Address LB = Address::invalid();
7621 if (OAShE) {
7622 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7623 CGF.getContext().getTypeAlignInChars(
7624 OAShE->getBase()->getType()));
7625 } else {
7626 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7627 .getAddress(CGF);
7628 }
7629
7630 // If this component is a pointer inside the base struct then we don't
7631 // need to create any entry for it - it will be combined with the object
7632 // it is pointing to into a single PTR_AND_OBJ entry.
7633 bool IsMemberPointerOrAddr =
7634 (IsPointer || ForDeviceAddr) && EncounteredME &&
7635 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7636 EncounteredME);
7637 if (!OverlappedElements.empty()) {
7638 // Handle base element with the info for overlapped elements.
7639 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7640 assert(Next == CE &&
7641 "Expected last element for the overlapped elements.");
7642 assert(!IsPointer &&
7643 "Unexpected base element with the pointer type.");
7644 // Mark the whole struct as the struct that requires allocation on the
7645 // device.
7646 PartialStruct.LowestElem = {0, LB};
7647 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7648 I->getAssociatedExpression()->getType());
7649 Address HB = CGF.Builder.CreateConstGEP(
7650 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7651 CGF.VoidPtrTy),
7652 TypeSize.getQuantity() - 1);
7653 PartialStruct.HighestElem = {
7654 std::numeric_limits<decltype(
7655 PartialStruct.HighestElem.first)>::max(),
7656 HB};
7657 PartialStruct.Base = BP;
7658 // Emit data for non-overlapped data.
7659 OpenMPOffloadMappingFlags Flags =
7660 OMP_MAP_MEMBER_OF |
7661 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7662 /*AddPtrFlag=*/false,
7663 /*AddIsTargetParamFlag=*/false);
7664 LB = BP;
7665 llvm::Value *Size = nullptr;
7666 // Do bitcopy of all non-overlapped structure elements.
7667 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7668 Component : OverlappedElements) {
7669 Address ComponentLB = Address::invalid();
7670 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7671 Component) {
7672 if (MC.getAssociatedDeclaration()) {
7673 ComponentLB =
7674 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7675 .getAddress(CGF);
7676 Size = CGF.Builder.CreatePtrDiff(
7677 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7678 CGF.EmitCastToVoidPtr(LB.getPointer()));
7679 break;
7680 }
7681 }
7682 BasePointers.push_back(BP.getPointer());
7683 Pointers.push_back(LB.getPointer());
7684 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7685 /*isSigned=*/true));
7686 Types.push_back(Flags);
7687 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7688 }
7689 BasePointers.push_back(BP.getPointer());
7690 Pointers.push_back(LB.getPointer());
7691 Size = CGF.Builder.CreatePtrDiff(
7692 CGF.EmitCastToVoidPtr(
7693 CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7694 CGF.EmitCastToVoidPtr(LB.getPointer()));
7695 Sizes.push_back(
7696 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7697 Types.push_back(Flags);
7698 break;
7699 }
7700 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7701 if (!IsMemberPointerOrAddr) {
7702 BasePointers.push_back(BP.getPointer());
7703 Pointers.push_back(LB.getPointer());
7704 Sizes.push_back(
7705 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7706
7707 // We need to add a pointer flag for each map that comes from the
7708 // same expression except for the first one. We also need to signal
7709 // this map is the first one that relates with the current capture
7710 // (there is a set of entries for each capture).
7711 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7712 MapType, MapModifiers, IsImplicit,
7713 !IsExpressionFirstInfo || RequiresReference,
7714 IsCaptureFirstInfo && !RequiresReference);
7715
7716 if (!IsExpressionFirstInfo) {
7717 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7718 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7719 if (IsPointer)
7720 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7721 OMP_MAP_DELETE | OMP_MAP_CLOSE);
7722
7723 if (ShouldBeMemberOf) {
7724 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7725 // should be later updated with the correct value of MEMBER_OF.
7726 Flags |= OMP_MAP_MEMBER_OF;
7727 // From now on, all subsequent PTR_AND_OBJ entries should not be
7728 // marked as MEMBER_OF.
7729 ShouldBeMemberOf = false;
7730 }
7731 }
7732
7733 Types.push_back(Flags);
7734 }
7735
7736 // If we have encountered a member expression so far, keep track of the
7737 // mapped member. If the parent is "*this", then the value declaration
7738 // is nullptr.
7739 if (EncounteredME) {
7740 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7741 unsigned FieldIndex = FD->getFieldIndex();
7742
7743 // Update info about the lowest and highest elements for this struct
7744 if (!PartialStruct.Base.isValid()) {
7745 PartialStruct.LowestElem = {FieldIndex, LB};
7746 if (IsFinalArraySection) {
7747 Address HB =
7748 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7749 .getAddress(CGF);
7750 PartialStruct.HighestElem = {FieldIndex, HB};
7751 } else {
7752 PartialStruct.HighestElem = {FieldIndex, LB};
7753 }
7754 PartialStruct.Base = BP;
7755 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7756 PartialStruct.LowestElem = {FieldIndex, LB};
7757 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7758 PartialStruct.HighestElem = {FieldIndex, LB};
7759 }
7760 }
7761
7762 // If we have a final array section, we are done with this expression.
7763 if (IsFinalArraySection)
7764 break;
7765
7766 // The pointer becomes the base for the next element.
7767 if (Next != CE)
7768 BP = LB;
7769
7770 IsExpressionFirstInfo = false;
7771 IsCaptureFirstInfo = false;
7772 }
7773 }
7774 }
7775
7776 /// Return the adjusted map modifiers if the declaration a capture refers to
7777 /// appears in a first-private clause. This is expected to be used only with
7778 /// directives that start with 'target'.
7779 MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const7780 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7781 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7782
7783 // A first private variable captured by reference will use only the
7784 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7785 // declaration is known as first-private in this handler.
7786 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7787 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7788 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7789 return MappableExprsHandler::OMP_MAP_ALWAYS |
7790 MappableExprsHandler::OMP_MAP_TO;
7791 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7792 return MappableExprsHandler::OMP_MAP_TO |
7793 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7794 return MappableExprsHandler::OMP_MAP_PRIVATE |
7795 MappableExprsHandler::OMP_MAP_TO;
7796 }
7797 return MappableExprsHandler::OMP_MAP_TO |
7798 MappableExprsHandler::OMP_MAP_FROM;
7799 }
7800
getMemberOfFlag(unsigned Position)7801 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7802 // Rotate by getFlagMemberOffset() bits.
7803 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7804 << getFlagMemberOffset());
7805 }
7806
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)7807 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7808 OpenMPOffloadMappingFlags MemberOfFlag) {
7809 // If the entry is PTR_AND_OBJ but has not been marked with the special
7810 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7811 // marked as MEMBER_OF.
7812 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7813 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7814 return;
7815
7816 // Reset the placeholder value to prepare the flag for the assignment of the
7817 // proper MEMBER_OF value.
7818 Flags &= ~OMP_MAP_MEMBER_OF;
7819 Flags |= MemberOfFlag;
7820 }
7821
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const7822 void getPlainLayout(const CXXRecordDecl *RD,
7823 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7824 bool AsBase) const {
7825 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7826
7827 llvm::StructType *St =
7828 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7829
7830 unsigned NumElements = St->getNumElements();
7831 llvm::SmallVector<
7832 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7833 RecordLayout(NumElements);
7834
7835 // Fill bases.
7836 for (const auto &I : RD->bases()) {
7837 if (I.isVirtual())
7838 continue;
7839 const auto *Base = I.getType()->getAsCXXRecordDecl();
7840 // Ignore empty bases.
7841 if (Base->isEmpty() || CGF.getContext()
7842 .getASTRecordLayout(Base)
7843 .getNonVirtualSize()
7844 .isZero())
7845 continue;
7846
7847 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7848 RecordLayout[FieldIndex] = Base;
7849 }
7850 // Fill in virtual bases.
7851 for (const auto &I : RD->vbases()) {
7852 const auto *Base = I.getType()->getAsCXXRecordDecl();
7853 // Ignore empty bases.
7854 if (Base->isEmpty())
7855 continue;
7856 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7857 if (RecordLayout[FieldIndex])
7858 continue;
7859 RecordLayout[FieldIndex] = Base;
7860 }
7861 // Fill in all the fields.
7862 assert(!RD->isUnion() && "Unexpected union.");
7863 for (const auto *Field : RD->fields()) {
7864 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7865 // will fill in later.)
7866 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7867 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7868 RecordLayout[FieldIndex] = Field;
7869 }
7870 }
7871 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7872 &Data : RecordLayout) {
7873 if (Data.isNull())
7874 continue;
7875 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7876 getPlainLayout(Base, Layout, /*AsBase=*/true);
7877 else
7878 Layout.push_back(Data.get<const FieldDecl *>());
7879 }
7880 }
7881
7882 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)7883 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7884 : CurDir(&Dir), CGF(CGF) {
7885 // Extract firstprivate clause information.
7886 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7887 for (const auto *D : C->varlists())
7888 FirstPrivateDecls.try_emplace(
7889 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7890 // Extract implicit firstprivates from uses_allocators clauses.
7891 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7892 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7893 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7894 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7895 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7896 /*Implicit=*/true);
7897 else if (const auto *VD = dyn_cast<VarDecl>(
7898 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7899 ->getDecl()))
7900 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7901 }
7902 }
7903 // Extract device pointer clause information.
7904 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7905 for (auto L : C->component_lists())
7906 DevPointersMap[L.first].push_back(L.second);
7907 }
7908
7909 /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)7910 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7911 : CurDir(&Dir), CGF(CGF) {}
7912
7913 /// Generate code for the combined entry if we have a partially mapped struct
7914 /// and take care of the mapping flags of the arguments corresponding to
7915 /// individual struct members.
emitCombinedEntry(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct) const7916 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7917 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7918 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7919 const StructRangeInfoTy &PartialStruct) const {
7920 // Base is the base of the struct
7921 BasePointers.push_back(PartialStruct.Base.getPointer());
7922 // Pointer is the address of the lowest element
7923 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7924 Pointers.push_back(LB);
7925 // Size is (addr of {highest+1} element) - (addr of lowest element)
7926 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7927 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7928 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7929 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7930 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7931 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7932 /*isSigned=*/false);
7933 Sizes.push_back(Size);
7934 // Map type is always TARGET_PARAM
7935 Types.push_back(OMP_MAP_TARGET_PARAM);
7936 // Remove TARGET_PARAM flag from the first element
7937 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7938
7939 // All other current entries will be MEMBER_OF the combined entry
7940 // (except for PTR_AND_OBJ entries which do not have a placeholder value
7941 // 0xFFFF in the MEMBER_OF field).
7942 OpenMPOffloadMappingFlags MemberOfFlag =
7943 getMemberOfFlag(BasePointers.size() - 1);
7944 for (auto &M : CurTypes)
7945 setCorrectMemberOfFlag(M, MemberOfFlag);
7946 }
7947
7948 /// Generate all the base pointers, section pointers, sizes and map
7949 /// types for the extracted mappable expressions. Also, for each item that
7950 /// relates with a device pointer, a pair of the relevant declaration and
7951 /// index where it occurs is appended to the device pointers info array.
generateAllInfo(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const7952 void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7953 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7954 MapFlagsArrayTy &Types) const {
7955 // We have to process the component lists that relate with the same
7956 // declaration in a single chunk so that we can generate the map flags
7957 // correctly. Therefore, we organize all lists in a map.
7958 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7959
7960 // Helper function to fill the information map for the different supported
7961 // clauses.
7962 auto &&InfoGen =
7963 [&Info](const ValueDecl *D,
7964 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7965 OpenMPMapClauseKind MapType,
7966 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7967 bool ReturnDevicePointer, bool IsImplicit,
7968 bool ForDeviceAddr = false) {
7969 const ValueDecl *VD =
7970 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7971 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7972 IsImplicit, ForDeviceAddr);
7973 };
7974
7975 assert(CurDir.is<const OMPExecutableDirective *>() &&
7976 "Expect a executable directive");
7977 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7978 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7979 for (const auto L : C->component_lists()) {
7980 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7981 /*ReturnDevicePointer=*/false, C->isImplicit());
7982 }
7983 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7984 for (const auto L : C->component_lists()) {
7985 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7986 /*ReturnDevicePointer=*/false, C->isImplicit());
7987 }
7988 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7989 for (const auto L : C->component_lists()) {
7990 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7991 /*ReturnDevicePointer=*/false, C->isImplicit());
7992 }
7993
7994 // Look at the use_device_ptr clause information and mark the existing map
7995 // entries as such. If there is no map information for an entry in the
7996 // use_device_ptr list, we create one with map type 'alloc' and zero size
7997 // section. It is the user fault if that was not mapped before. If there is
7998 // no map information and the pointer is a struct member, then we defer the
7999 // emission of that entry until the whole struct has been processed.
8000 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8001 DeferredInfo;
8002
8003 for (const auto *C :
8004 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8005 for (const auto L : C->component_lists()) {
8006 assert(!L.second.empty() && "Not expecting empty list of components!");
8007 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8008 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8009 const Expr *IE = L.second.back().getAssociatedExpression();
8010 // If the first component is a member expression, we have to look into
8011 // 'this', which maps to null in the map of map information. Otherwise
8012 // look directly for the information.
8013 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8014
8015 // We potentially have map information for this declaration already.
8016 // Look for the first set of components that refer to it.
8017 if (It != Info.end()) {
8018 auto CI = std::find_if(
8019 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8020 return MI.Components.back().getAssociatedDeclaration() == VD;
8021 });
8022 // If we found a map entry, signal that the pointer has to be returned
8023 // and move on to the next declaration.
8024 if (CI != It->second.end()) {
8025 CI->ReturnDevicePointer = true;
8026 continue;
8027 }
8028 }
8029
8030 // We didn't find any match in our map information - generate a zero
8031 // size array section - if the pointer is a struct member we defer this
8032 // action until the whole struct has been processed.
8033 if (isa<MemberExpr>(IE)) {
8034 // Insert the pointer into Info to be processed by
8035 // generateInfoForComponentList. Because it is a member pointer
8036 // without a pointee, no entry will be generated for it, therefore
8037 // we need to generate one after the whole struct has been processed.
8038 // Nonetheless, generateInfoForComponentList must be called to take
8039 // the pointer into account for the calculation of the range of the
8040 // partial struct.
8041 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8042 /*ReturnDevicePointer=*/false, C->isImplicit());
8043 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8044 } else {
8045 llvm::Value *Ptr =
8046 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8047 BasePointers.emplace_back(Ptr, VD);
8048 Pointers.push_back(Ptr);
8049 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8050 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8051 }
8052 }
8053 }
8054
8055 // Look at the use_device_addr clause information and mark the existing map
8056 // entries as such. If there is no map information for an entry in the
8057 // use_device_addr list, we create one with map type 'alloc' and zero size
8058 // section. It is the user fault if that was not mapped before. If there is
8059 // no map information and the pointer is a struct member, then we defer the
8060 // emission of that entry until the whole struct has been processed.
8061 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8062 for (const auto *C :
8063 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8064 for (const auto L : C->component_lists()) {
8065 assert(!L.second.empty() && "Not expecting empty list of components!");
8066 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8067 if (!Processed.insert(VD).second)
8068 continue;
8069 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8070 const Expr *IE = L.second.back().getAssociatedExpression();
8071 // If the first component is a member expression, we have to look into
8072 // 'this', which maps to null in the map of map information. Otherwise
8073 // look directly for the information.
8074 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8075
8076 // We potentially have map information for this declaration already.
8077 // Look for the first set of components that refer to it.
8078 if (It != Info.end()) {
8079 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8080 return MI.Components.back().getAssociatedDeclaration() == VD;
8081 });
8082 // If we found a map entry, signal that the pointer has to be returned
8083 // and move on to the next declaration.
8084 if (CI != It->second.end()) {
8085 CI->ReturnDevicePointer = true;
8086 continue;
8087 }
8088 }
8089
8090 // We didn't find any match in our map information - generate a zero
8091 // size array section - if the pointer is a struct member we defer this
8092 // action until the whole struct has been processed.
8093 if (isa<MemberExpr>(IE)) {
8094 // Insert the pointer into Info to be processed by
8095 // generateInfoForComponentList. Because it is a member pointer
8096 // without a pointee, no entry will be generated for it, therefore
8097 // we need to generate one after the whole struct has been processed.
8098 // Nonetheless, generateInfoForComponentList must be called to take
8099 // the pointer into account for the calculation of the range of the
8100 // partial struct.
8101 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8102 /*ReturnDevicePointer=*/false, C->isImplicit(),
8103 /*ForDeviceAddr=*/true);
8104 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8105 } else {
8106 llvm::Value *Ptr;
8107 if (IE->isGLValue())
8108 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8109 else
8110 Ptr = CGF.EmitScalarExpr(IE);
8111 BasePointers.emplace_back(Ptr, VD);
8112 Pointers.push_back(Ptr);
8113 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8114 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8115 }
8116 }
8117 }
8118
8119 for (const auto &M : Info) {
8120 // We need to know when we generate information for the first component
8121 // associated with a capture, because the mapping flags depend on it.
8122 bool IsFirstComponentList = true;
8123
8124 // Temporary versions of arrays
8125 MapBaseValuesArrayTy CurBasePointers;
8126 MapValuesArrayTy CurPointers;
8127 MapValuesArrayTy CurSizes;
8128 MapFlagsArrayTy CurTypes;
8129 StructRangeInfoTy PartialStruct;
8130
8131 for (const MapInfo &L : M.second) {
8132 assert(!L.Components.empty() &&
8133 "Not expecting declaration with no component lists.");
8134
8135 // Remember the current base pointer index.
8136 unsigned CurrentBasePointersIdx = CurBasePointers.size();
8137 generateInfoForComponentList(
8138 L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8139 CurPointers, CurSizes, CurTypes, PartialStruct,
8140 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8141
8142 // If this entry relates with a device pointer, set the relevant
8143 // declaration and add the 'return pointer' flag.
8144 if (L.ReturnDevicePointer) {
8145 assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8146 "Unexpected number of mapped base pointers.");
8147
8148 const ValueDecl *RelevantVD =
8149 L.Components.back().getAssociatedDeclaration();
8150 assert(RelevantVD &&
8151 "No relevant declaration related with device pointer??");
8152
8153 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8154 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8155 }
8156 IsFirstComponentList = false;
8157 }
8158
8159 // Append any pending zero-length pointers which are struct members and
8160 // used with use_device_ptr or use_device_addr.
8161 auto CI = DeferredInfo.find(M.first);
8162 if (CI != DeferredInfo.end()) {
8163 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8164 llvm::Value *BasePtr;
8165 llvm::Value *Ptr;
8166 if (L.ForDeviceAddr) {
8167 if (L.IE->isGLValue())
8168 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8169 else
8170 Ptr = this->CGF.EmitScalarExpr(L.IE);
8171 BasePtr = Ptr;
8172 // Entry is RETURN_PARAM. Also, set the placeholder value
8173 // MEMBER_OF=FFFF so that the entry is later updated with the
8174 // correct value of MEMBER_OF.
8175 CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8176 } else {
8177 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8178 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8179 L.IE->getExprLoc());
8180 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8181 // value MEMBER_OF=FFFF so that the entry is later updated with the
8182 // correct value of MEMBER_OF.
8183 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8184 OMP_MAP_MEMBER_OF);
8185 }
8186 CurBasePointers.emplace_back(BasePtr, L.VD);
8187 CurPointers.push_back(Ptr);
8188 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8189 }
8190 }
8191
8192 // If there is an entry in PartialStruct it means we have a struct with
8193 // individual members mapped. Emit an extra combined entry.
8194 if (PartialStruct.Base.isValid())
8195 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8196 PartialStruct);
8197
8198 // We need to append the results of this capture to what we already have.
8199 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8200 Pointers.append(CurPointers.begin(), CurPointers.end());
8201 Sizes.append(CurSizes.begin(), CurSizes.end());
8202 Types.append(CurTypes.begin(), CurTypes.end());
8203 }
8204 }
8205
8206 /// Generate all the base pointers, section pointers, sizes and map types for
8207 /// the extracted map clauses of user-defined mapper.
generateAllInfoForMapper(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8208 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8209 MapValuesArrayTy &Pointers,
8210 MapValuesArrayTy &Sizes,
8211 MapFlagsArrayTy &Types) const {
8212 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8213 "Expect a declare mapper directive");
8214 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8215 // We have to process the component lists that relate with the same
8216 // declaration in a single chunk so that we can generate the map flags
8217 // correctly. Therefore, we organize all lists in a map.
8218 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8219
8220 // Helper function to fill the information map for the different supported
8221 // clauses.
8222 auto &&InfoGen = [&Info](
8223 const ValueDecl *D,
8224 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8225 OpenMPMapClauseKind MapType,
8226 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8227 bool ReturnDevicePointer, bool IsImplicit) {
8228 const ValueDecl *VD =
8229 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8230 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8231 IsImplicit);
8232 };
8233
8234 for (const auto *C : CurMapperDir->clauselists()) {
8235 const auto *MC = cast<OMPMapClause>(C);
8236 for (const auto L : MC->component_lists()) {
8237 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8238 /*ReturnDevicePointer=*/false, MC->isImplicit());
8239 }
8240 }
8241
8242 for (const auto &M : Info) {
8243 // We need to know when we generate information for the first component
8244 // associated with a capture, because the mapping flags depend on it.
8245 bool IsFirstComponentList = true;
8246
8247 // Temporary versions of arrays
8248 MapBaseValuesArrayTy CurBasePointers;
8249 MapValuesArrayTy CurPointers;
8250 MapValuesArrayTy CurSizes;
8251 MapFlagsArrayTy CurTypes;
8252 StructRangeInfoTy PartialStruct;
8253
8254 for (const MapInfo &L : M.second) {
8255 assert(!L.Components.empty() &&
8256 "Not expecting declaration with no component lists.");
8257 generateInfoForComponentList(
8258 L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8259 CurPointers, CurSizes, CurTypes, PartialStruct,
8260 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8261 IsFirstComponentList = false;
8262 }
8263
8264 // If there is an entry in PartialStruct it means we have a struct with
8265 // individual members mapped. Emit an extra combined entry.
8266 if (PartialStruct.Base.isValid())
8267 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8268 PartialStruct);
8269
8270 // We need to append the results of this capture to what we already have.
8271 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8272 Pointers.append(CurPointers.begin(), CurPointers.end());
8273 Sizes.append(CurSizes.begin(), CurSizes.end());
8274 Types.append(CurTypes.begin(), CurTypes.end());
8275 }
8276 }
8277
8278 /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8279 void generateInfoForLambdaCaptures(
8280 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8281 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8282 MapFlagsArrayTy &Types,
8283 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8284 const auto *RD = VD->getType()
8285 .getCanonicalType()
8286 .getNonReferenceType()
8287 ->getAsCXXRecordDecl();
8288 if (!RD || !RD->isLambda())
8289 return;
8290 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8291 LValue VDLVal = CGF.MakeAddrLValue(
8292 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8293 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8294 FieldDecl *ThisCapture = nullptr;
8295 RD->getCaptureFields(Captures, ThisCapture);
8296 if (ThisCapture) {
8297 LValue ThisLVal =
8298 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8299 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8300 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8301 VDLVal.getPointer(CGF));
8302 BasePointers.push_back(ThisLVal.getPointer(CGF));
8303 Pointers.push_back(ThisLValVal.getPointer(CGF));
8304 Sizes.push_back(
8305 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8306 CGF.Int64Ty, /*isSigned=*/true));
8307 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8308 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8309 }
8310 for (const LambdaCapture &LC : RD->captures()) {
8311 if (!LC.capturesVariable())
8312 continue;
8313 const VarDecl *VD = LC.getCapturedVar();
8314 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8315 continue;
8316 auto It = Captures.find(VD);
8317 assert(It != Captures.end() && "Found lambda capture without field.");
8318 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8319 if (LC.getCaptureKind() == LCK_ByRef) {
8320 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8321 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8322 VDLVal.getPointer(CGF));
8323 BasePointers.push_back(VarLVal.getPointer(CGF));
8324 Pointers.push_back(VarLValVal.getPointer(CGF));
8325 Sizes.push_back(CGF.Builder.CreateIntCast(
8326 CGF.getTypeSize(
8327 VD->getType().getCanonicalType().getNonReferenceType()),
8328 CGF.Int64Ty, /*isSigned=*/true));
8329 } else {
8330 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8331 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8332 VDLVal.getPointer(CGF));
8333 BasePointers.push_back(VarLVal.getPointer(CGF));
8334 Pointers.push_back(VarRVal.getScalarVal());
8335 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8336 }
8337 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8338 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8339 }
8340 }
8341
8342 /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const8343 void adjustMemberOfForLambdaCaptures(
8344 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8345 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8346 MapFlagsArrayTy &Types) const {
8347 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8348 // Set correct member_of idx for all implicit lambda captures.
8349 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8350 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8351 continue;
8352 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8353 assert(BasePtr && "Unable to find base lambda address.");
8354 int TgtIdx = -1;
8355 for (unsigned J = I; J > 0; --J) {
8356 unsigned Idx = J - 1;
8357 if (Pointers[Idx] != BasePtr)
8358 continue;
8359 TgtIdx = Idx;
8360 break;
8361 }
8362 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8363 // All other current entries will be MEMBER_OF the combined entry
8364 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8365 // 0xFFFF in the MEMBER_OF field).
8366 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8367 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8368 }
8369 }
8370
8371 /// Generate the base pointers, section pointers, sizes and map types
8372 /// associated to a given capture.
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types,StructRangeInfoTy & PartialStruct) const8373 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8374 llvm::Value *Arg,
8375 MapBaseValuesArrayTy &BasePointers,
8376 MapValuesArrayTy &Pointers,
8377 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8378 StructRangeInfoTy &PartialStruct) const {
8379 assert(!Cap->capturesVariableArrayType() &&
8380 "Not expecting to generate map info for a variable array type!");
8381
8382 // We need to know when we generating information for the first component
8383 const ValueDecl *VD = Cap->capturesThis()
8384 ? nullptr
8385 : Cap->getCapturedVar()->getCanonicalDecl();
8386
8387 // If this declaration appears in a is_device_ptr clause we just have to
8388 // pass the pointer by value. If it is a reference to a declaration, we just
8389 // pass its value.
8390 if (DevPointersMap.count(VD)) {
8391 BasePointers.emplace_back(Arg, VD);
8392 Pointers.push_back(Arg);
8393 Sizes.push_back(
8394 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8395 CGF.Int64Ty, /*isSigned=*/true));
8396 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8397 return;
8398 }
8399
8400 using MapData =
8401 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8402 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8403 SmallVector<MapData, 4> DeclComponentLists;
8404 assert(CurDir.is<const OMPExecutableDirective *>() &&
8405 "Expect a executable directive");
8406 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8407 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8408 for (const auto L : C->decl_component_lists(VD)) {
8409 assert(L.first == VD &&
8410 "We got information for the wrong declaration??");
8411 assert(!L.second.empty() &&
8412 "Not expecting declaration with no component lists.");
8413 DeclComponentLists.emplace_back(L.second, C->getMapType(),
8414 C->getMapTypeModifiers(),
8415 C->isImplicit());
8416 }
8417 }
8418
8419 // Find overlapping elements (including the offset from the base element).
8420 llvm::SmallDenseMap<
8421 const MapData *,
8422 llvm::SmallVector<
8423 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8424 4>
8425 OverlappedData;
8426 size_t Count = 0;
8427 for (const MapData &L : DeclComponentLists) {
8428 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8429 OpenMPMapClauseKind MapType;
8430 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8431 bool IsImplicit;
8432 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8433 ++Count;
8434 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8435 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8436 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8437 auto CI = Components.rbegin();
8438 auto CE = Components.rend();
8439 auto SI = Components1.rbegin();
8440 auto SE = Components1.rend();
8441 for (; CI != CE && SI != SE; ++CI, ++SI) {
8442 if (CI->getAssociatedExpression()->getStmtClass() !=
8443 SI->getAssociatedExpression()->getStmtClass())
8444 break;
8445 // Are we dealing with different variables/fields?
8446 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8447 break;
8448 }
8449 // Found overlapping if, at least for one component, reached the head of
8450 // the components list.
8451 if (CI == CE || SI == SE) {
8452 assert((CI != CE || SI != SE) &&
8453 "Unexpected full match of the mapping components.");
8454 const MapData &BaseData = CI == CE ? L : L1;
8455 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8456 SI == SE ? Components : Components1;
8457 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8458 OverlappedElements.getSecond().push_back(SubData);
8459 }
8460 }
8461 }
8462 // Sort the overlapped elements for each item.
8463 llvm::SmallVector<const FieldDecl *, 4> Layout;
8464 if (!OverlappedData.empty()) {
8465 if (const auto *CRD =
8466 VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8467 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8468 else {
8469 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8470 Layout.append(RD->field_begin(), RD->field_end());
8471 }
8472 }
8473 for (auto &Pair : OverlappedData) {
8474 llvm::sort(
8475 Pair.getSecond(),
8476 [&Layout](
8477 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8478 OMPClauseMappableExprCommon::MappableExprComponentListRef
8479 Second) {
8480 auto CI = First.rbegin();
8481 auto CE = First.rend();
8482 auto SI = Second.rbegin();
8483 auto SE = Second.rend();
8484 for (; CI != CE && SI != SE; ++CI, ++SI) {
8485 if (CI->getAssociatedExpression()->getStmtClass() !=
8486 SI->getAssociatedExpression()->getStmtClass())
8487 break;
8488 // Are we dealing with different variables/fields?
8489 if (CI->getAssociatedDeclaration() !=
8490 SI->getAssociatedDeclaration())
8491 break;
8492 }
8493
8494 // Lists contain the same elements.
8495 if (CI == CE && SI == SE)
8496 return false;
8497
8498 // List with less elements is less than list with more elements.
8499 if (CI == CE || SI == SE)
8500 return CI == CE;
8501
8502 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8503 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8504 if (FD1->getParent() == FD2->getParent())
8505 return FD1->getFieldIndex() < FD2->getFieldIndex();
8506 const auto It =
8507 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8508 return FD == FD1 || FD == FD2;
8509 });
8510 return *It == FD1;
8511 });
8512 }
8513
8514 // Associated with a capture, because the mapping flags depend on it.
8515 // Go through all of the elements with the overlapped elements.
8516 for (const auto &Pair : OverlappedData) {
8517 const MapData &L = *Pair.getFirst();
8518 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8519 OpenMPMapClauseKind MapType;
8520 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8521 bool IsImplicit;
8522 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8523 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8524 OverlappedComponents = Pair.getSecond();
8525 bool IsFirstComponentList = true;
8526 generateInfoForComponentList(
8527 MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8528 Types, PartialStruct, IsFirstComponentList, IsImplicit,
8529 /*ForDeviceAddr=*/false, OverlappedComponents);
8530 }
8531 // Go through other elements without overlapped elements.
8532 bool IsFirstComponentList = OverlappedData.empty();
8533 for (const MapData &L : DeclComponentLists) {
8534 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8535 OpenMPMapClauseKind MapType;
8536 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8537 bool IsImplicit;
8538 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8539 auto It = OverlappedData.find(&L);
8540 if (It == OverlappedData.end())
8541 generateInfoForComponentList(MapType, MapModifiers, Components,
8542 BasePointers, Pointers, Sizes, Types,
8543 PartialStruct, IsFirstComponentList,
8544 IsImplicit);
8545 IsFirstComponentList = false;
8546 }
8547 }
8548
8549 /// Generate the base pointers, section pointers, sizes and map types
8550 /// associated with the declare target link variables.
generateInfoForDeclareTargetLink(MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapValuesArrayTy & Sizes,MapFlagsArrayTy & Types) const8551 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8552 MapValuesArrayTy &Pointers,
8553 MapValuesArrayTy &Sizes,
8554 MapFlagsArrayTy &Types) const {
8555 assert(CurDir.is<const OMPExecutableDirective *>() &&
8556 "Expect a executable directive");
8557 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8558 // Map other list items in the map clause which are not captured variables
8559 // but "declare target link" global variables.
8560 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8561 for (const auto L : C->component_lists()) {
8562 if (!L.first)
8563 continue;
8564 const auto *VD = dyn_cast<VarDecl>(L.first);
8565 if (!VD)
8566 continue;
8567 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8568 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8569 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8570 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8571 continue;
8572 StructRangeInfoTy PartialStruct;
8573 generateInfoForComponentList(
8574 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8575 Pointers, Sizes, Types, PartialStruct,
8576 /*IsFirstComponentList=*/true, C->isImplicit());
8577 assert(!PartialStruct.Base.isValid() &&
8578 "No partial structs for declare target link expected.");
8579 }
8580 }
8581 }
8582
8583 /// Generate the default map information for a given capture \a CI,
8584 /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapBaseValuesArrayTy & CurBasePointers,MapValuesArrayTy & CurPointers,MapValuesArrayTy & CurSizes,MapFlagsArrayTy & CurMapTypes) const8585 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8586 const FieldDecl &RI, llvm::Value *CV,
8587 MapBaseValuesArrayTy &CurBasePointers,
8588 MapValuesArrayTy &CurPointers,
8589 MapValuesArrayTy &CurSizes,
8590 MapFlagsArrayTy &CurMapTypes) const {
8591 bool IsImplicit = true;
8592 // Do the default mapping.
8593 if (CI.capturesThis()) {
8594 CurBasePointers.push_back(CV);
8595 CurPointers.push_back(CV);
8596 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8597 CurSizes.push_back(
8598 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8599 CGF.Int64Ty, /*isSigned=*/true));
8600 // Default map type.
8601 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8602 } else if (CI.capturesVariableByCopy()) {
8603 CurBasePointers.push_back(CV);
8604 CurPointers.push_back(CV);
8605 if (!RI.getType()->isAnyPointerType()) {
8606 // We have to signal to the runtime captures passed by value that are
8607 // not pointers.
8608 CurMapTypes.push_back(OMP_MAP_LITERAL);
8609 CurSizes.push_back(CGF.Builder.CreateIntCast(
8610 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8611 } else {
8612 // Pointers are implicitly mapped with a zero size and no flags
8613 // (other than first map that is added for all implicit maps).
8614 CurMapTypes.push_back(OMP_MAP_NONE);
8615 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8616 }
8617 const VarDecl *VD = CI.getCapturedVar();
8618 auto I = FirstPrivateDecls.find(VD);
8619 if (I != FirstPrivateDecls.end())
8620 IsImplicit = I->getSecond();
8621 } else {
8622 assert(CI.capturesVariable() && "Expected captured reference.");
8623 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8624 QualType ElementType = PtrTy->getPointeeType();
8625 CurSizes.push_back(CGF.Builder.CreateIntCast(
8626 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8627 // The default map type for a scalar/complex type is 'to' because by
8628 // default the value doesn't have to be retrieved. For an aggregate
8629 // type, the default is 'tofrom'.
8630 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8631 const VarDecl *VD = CI.getCapturedVar();
8632 auto I = FirstPrivateDecls.find(VD);
8633 if (I != FirstPrivateDecls.end() &&
8634 VD->getType().isConstant(CGF.getContext())) {
8635 llvm::Constant *Addr =
8636 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8637 // Copy the value of the original variable to the new global copy.
8638 CGF.Builder.CreateMemCpy(
8639 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8640 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8641 CurSizes.back(), /*IsVolatile=*/false);
8642 // Use new global variable as the base pointers.
8643 CurBasePointers.push_back(Addr);
8644 CurPointers.push_back(Addr);
8645 } else {
8646 CurBasePointers.push_back(CV);
8647 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8648 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8649 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8650 AlignmentSource::Decl));
8651 CurPointers.push_back(PtrAddr.getPointer());
8652 } else {
8653 CurPointers.push_back(CV);
8654 }
8655 }
8656 if (I != FirstPrivateDecls.end())
8657 IsImplicit = I->getSecond();
8658 }
8659 // Every default map produces a single argument which is a target parameter.
8660 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8661
8662 // Add flag stating this is an implicit map.
8663 if (IsImplicit)
8664 CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8665 }
8666 };
8667 } // anonymous namespace
8668
8669 /// Emit the arrays used to pass the captures and map information to the
8670 /// offloading runtime library. If there is no map or capture information,
8671 /// return nullptr by reference.
8672 static void
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapBaseValuesArrayTy & BasePointers,MappableExprsHandler::MapValuesArrayTy & Pointers,MappableExprsHandler::MapValuesArrayTy & Sizes,MappableExprsHandler::MapFlagsArrayTy & MapTypes,CGOpenMPRuntime::TargetDataInfo & Info)8673 emitOffloadingArrays(CodeGenFunction &CGF,
8674 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8675 MappableExprsHandler::MapValuesArrayTy &Pointers,
8676 MappableExprsHandler::MapValuesArrayTy &Sizes,
8677 MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8678 CGOpenMPRuntime::TargetDataInfo &Info) {
8679 CodeGenModule &CGM = CGF.CGM;
8680 ASTContext &Ctx = CGF.getContext();
8681
8682 // Reset the array information.
8683 Info.clearArrayInfo();
8684 Info.NumberOfPtrs = BasePointers.size();
8685
8686 if (Info.NumberOfPtrs) {
8687 // Detect if we have any capture size requiring runtime evaluation of the
8688 // size so that a constant array could be eventually used.
8689 bool hasRuntimeEvaluationCaptureSize = false;
8690 for (llvm::Value *S : Sizes)
8691 if (!isa<llvm::Constant>(S)) {
8692 hasRuntimeEvaluationCaptureSize = true;
8693 break;
8694 }
8695
8696 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8697 QualType PointerArrayType = Ctx.getConstantArrayType(
8698 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8699 /*IndexTypeQuals=*/0);
8700
8701 Info.BasePointersArray =
8702 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8703 Info.PointersArray =
8704 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8705
8706 // If we don't have any VLA types or other types that require runtime
8707 // evaluation, we can use a constant array for the map sizes, otherwise we
8708 // need to fill up the arrays as we do for the pointers.
8709 QualType Int64Ty =
8710 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8711 if (hasRuntimeEvaluationCaptureSize) {
8712 QualType SizeArrayType = Ctx.getConstantArrayType(
8713 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8714 /*IndexTypeQuals=*/0);
8715 Info.SizesArray =
8716 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8717 } else {
8718 // We expect all the sizes to be constant, so we collect them to create
8719 // a constant array.
8720 SmallVector<llvm::Constant *, 16> ConstSizes;
8721 for (llvm::Value *S : Sizes)
8722 ConstSizes.push_back(cast<llvm::Constant>(S));
8723
8724 auto *SizesArrayInit = llvm::ConstantArray::get(
8725 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8726 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8727 auto *SizesArrayGbl = new llvm::GlobalVariable(
8728 CGM.getModule(), SizesArrayInit->getType(),
8729 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8730 SizesArrayInit, Name);
8731 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8732 Info.SizesArray = SizesArrayGbl;
8733 }
8734
8735 // The map types are always constant so we don't need to generate code to
8736 // fill arrays. Instead, we create an array constant.
8737 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8738 llvm::copy(MapTypes, Mapping.begin());
8739 llvm::Constant *MapTypesArrayInit =
8740 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8741 std::string MaptypesName =
8742 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8743 auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8744 CGM.getModule(), MapTypesArrayInit->getType(),
8745 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8746 MapTypesArrayInit, MaptypesName);
8747 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8748 Info.MapTypesArray = MapTypesArrayGbl;
8749
8750 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8751 llvm::Value *BPVal = *BasePointers[I];
8752 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8753 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8754 Info.BasePointersArray, 0, I);
8755 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8756 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8757 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8758 CGF.Builder.CreateStore(BPVal, BPAddr);
8759
8760 if (Info.requiresDevicePointerInfo())
8761 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8762 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8763
8764 llvm::Value *PVal = Pointers[I];
8765 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8766 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8767 Info.PointersArray, 0, I);
8768 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8769 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8770 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8771 CGF.Builder.CreateStore(PVal, PAddr);
8772
8773 if (hasRuntimeEvaluationCaptureSize) {
8774 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8775 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8776 Info.SizesArray,
8777 /*Idx0=*/0,
8778 /*Idx1=*/I);
8779 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8780 CGF.Builder.CreateStore(
8781 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8782 SAddr);
8783 }
8784 }
8785 }
8786 }
8787
8788 /// Emit the arguments to be passed to the runtime library based on the
8789 /// arrays of pointers, sizes and map types.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,CGOpenMPRuntime::TargetDataInfo & Info)8790 static void emitOffloadingArraysArgument(
8791 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8792 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8793 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8794 CodeGenModule &CGM = CGF.CGM;
8795 if (Info.NumberOfPtrs) {
8796 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8797 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8798 Info.BasePointersArray,
8799 /*Idx0=*/0, /*Idx1=*/0);
8800 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8801 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8802 Info.PointersArray,
8803 /*Idx0=*/0,
8804 /*Idx1=*/0);
8805 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8806 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8807 /*Idx0=*/0, /*Idx1=*/0);
8808 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8809 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8810 Info.MapTypesArray,
8811 /*Idx0=*/0,
8812 /*Idx1=*/0);
8813 } else {
8814 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8815 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8816 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
8817 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo(DefaultAS));
8818 MapTypesArrayArg =
8819 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo(DefaultAS));
8820 }
8821 }
8822
8823 /// Check for inner distribute directive.
8824 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)8825 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8826 const auto *CS = D.getInnermostCapturedStmt();
8827 const auto *Body =
8828 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8829 const Stmt *ChildStmt =
8830 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8831
8832 if (const auto *NestedDir =
8833 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8834 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8835 switch (D.getDirectiveKind()) {
8836 case OMPD_target:
8837 if (isOpenMPDistributeDirective(DKind))
8838 return NestedDir;
8839 if (DKind == OMPD_teams) {
8840 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8841 /*IgnoreCaptured=*/true);
8842 if (!Body)
8843 return nullptr;
8844 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8845 if (const auto *NND =
8846 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8847 DKind = NND->getDirectiveKind();
8848 if (isOpenMPDistributeDirective(DKind))
8849 return NND;
8850 }
8851 }
8852 return nullptr;
8853 case OMPD_target_teams:
8854 if (isOpenMPDistributeDirective(DKind))
8855 return NestedDir;
8856 return nullptr;
8857 case OMPD_target_parallel:
8858 case OMPD_target_simd:
8859 case OMPD_target_parallel_for:
8860 case OMPD_target_parallel_for_simd:
8861 return nullptr;
8862 case OMPD_target_teams_distribute:
8863 case OMPD_target_teams_distribute_simd:
8864 case OMPD_target_teams_distribute_parallel_for:
8865 case OMPD_target_teams_distribute_parallel_for_simd:
8866 case OMPD_parallel:
8867 case OMPD_for:
8868 case OMPD_parallel_for:
8869 case OMPD_parallel_master:
8870 case OMPD_parallel_sections:
8871 case OMPD_for_simd:
8872 case OMPD_parallel_for_simd:
8873 case OMPD_cancel:
8874 case OMPD_cancellation_point:
8875 case OMPD_ordered:
8876 case OMPD_threadprivate:
8877 case OMPD_allocate:
8878 case OMPD_task:
8879 case OMPD_simd:
8880 case OMPD_sections:
8881 case OMPD_section:
8882 case OMPD_single:
8883 case OMPD_master:
8884 case OMPD_critical:
8885 case OMPD_taskyield:
8886 case OMPD_barrier:
8887 case OMPD_taskwait:
8888 case OMPD_taskgroup:
8889 case OMPD_atomic:
8890 case OMPD_flush:
8891 case OMPD_depobj:
8892 case OMPD_scan:
8893 case OMPD_teams:
8894 case OMPD_target_data:
8895 case OMPD_target_exit_data:
8896 case OMPD_target_enter_data:
8897 case OMPD_distribute:
8898 case OMPD_distribute_simd:
8899 case OMPD_distribute_parallel_for:
8900 case OMPD_distribute_parallel_for_simd:
8901 case OMPD_teams_distribute:
8902 case OMPD_teams_distribute_simd:
8903 case OMPD_teams_distribute_parallel_for:
8904 case OMPD_teams_distribute_parallel_for_simd:
8905 case OMPD_target_update:
8906 case OMPD_declare_simd:
8907 case OMPD_declare_variant:
8908 case OMPD_begin_declare_variant:
8909 case OMPD_end_declare_variant:
8910 case OMPD_declare_target:
8911 case OMPD_end_declare_target:
8912 case OMPD_declare_reduction:
8913 case OMPD_declare_mapper:
8914 case OMPD_taskloop:
8915 case OMPD_taskloop_simd:
8916 case OMPD_master_taskloop:
8917 case OMPD_master_taskloop_simd:
8918 case OMPD_parallel_master_taskloop:
8919 case OMPD_parallel_master_taskloop_simd:
8920 case OMPD_requires:
8921 case OMPD_unknown:
8922 default:
8923 llvm_unreachable("Unexpected directive.");
8924 }
8925 }
8926
8927 return nullptr;
8928 }
8929
8930 /// Emit the user-defined mapper function. The code generation follows the
8931 /// pattern in the example below.
8932 /// \code
8933 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8934 /// void *base, void *begin,
8935 /// int64_t size, int64_t type) {
8936 /// // Allocate space for an array section first.
8937 /// if (size > 1 && !maptype.IsDelete)
8938 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8939 /// size*sizeof(Ty), clearToFrom(type));
8940 /// // Map members.
8941 /// for (unsigned i = 0; i < size; i++) {
8942 /// // For each component specified by this mapper:
8943 /// for (auto c : all_components) {
8944 /// if (c.hasMapper())
8945 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8946 /// c.arg_type);
8947 /// else
8948 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8949 /// c.arg_begin, c.arg_size, c.arg_type);
8950 /// }
8951 /// }
8952 /// // Delete the array section.
8953 /// if (size > 1 && maptype.IsDelete)
8954 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8955 /// size*sizeof(Ty), clearToFrom(type));
8956 /// }
8957 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)8958 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8959 CodeGenFunction *CGF) {
8960 if (UDMMap.count(D) > 0)
8961 return;
8962 ASTContext &C = CGM.getContext();
8963 QualType Ty = D->getType();
8964 QualType PtrTy = C.getPointerType(Ty).withRestrict();
8965 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8966 auto *MapperVarDecl =
8967 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8968 SourceLocation Loc = D->getLocation();
8969 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8970
8971 // Prepare mapper function arguments and attributes.
8972 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8973 C.VoidPtrTy, ImplicitParamDecl::Other);
8974 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8975 ImplicitParamDecl::Other);
8976 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8977 C.VoidPtrTy, ImplicitParamDecl::Other);
8978 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8979 ImplicitParamDecl::Other);
8980 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8981 ImplicitParamDecl::Other);
8982 FunctionArgList Args;
8983 Args.push_back(&HandleArg);
8984 Args.push_back(&BaseArg);
8985 Args.push_back(&BeginArg);
8986 Args.push_back(&SizeArg);
8987 Args.push_back(&TypeArg);
8988 const CGFunctionInfo &FnInfo =
8989 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8990 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8991 SmallString<64> TyStr;
8992 llvm::raw_svector_ostream Out(TyStr);
8993 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8994 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8995 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8996 Name, &CGM.getModule());
8997 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8998 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8999 // Start the mapper function code generation.
9000 CodeGenFunction MapperCGF(CGM);
9001 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9002 // Compute the starting and end addreses of array elements.
9003 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9004 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9005 C.getPointerType(Int64Ty), Loc);
9006 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9007 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9008 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9009 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9010 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9011 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9012 C.getPointerType(Int64Ty), Loc);
9013 // Prepare common arguments for array initiation and deletion.
9014 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9015 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9016 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9017 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9018 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9019 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9020 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9021 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9022 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9023
9024 // Emit array initiation if this is an array section and \p MapType indicates
9025 // that memory allocation is required.
9026 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9027 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9028 ElementSize, HeadBB, /*IsInit=*/true);
9029
9030 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9031
9032 // Emit the loop header block.
9033 MapperCGF.EmitBlock(HeadBB);
9034 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9035 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9036 // Evaluate whether the initial condition is satisfied.
9037 llvm::Value *IsEmpty =
9038 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9039 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9040 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9041
9042 // Emit the loop body block.
9043 MapperCGF.EmitBlock(BodyBB);
9044 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9045 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9046 PtrPHI->addIncoming(PtrBegin, EntryBB);
9047 Address PtrCurrent =
9048 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9049 .getAlignment()
9050 .alignmentOfArrayElement(ElementSize));
9051 // Privatize the declared variable of mapper to be the current array element.
9052 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9053 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9054 return MapperCGF
9055 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9056 .getAddress(MapperCGF);
9057 });
9058 (void)Scope.Privatize();
9059
9060 // Get map clause information. Fill up the arrays with all mapped variables.
9061 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9062 MappableExprsHandler::MapValuesArrayTy Pointers;
9063 MappableExprsHandler::MapValuesArrayTy Sizes;
9064 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9065 MappableExprsHandler MEHandler(*D, MapperCGF);
9066 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9067
9068 // Call the runtime API __tgt_mapper_num_components to get the number of
9069 // pre-existing components.
9070 llvm::Value *OffloadingArgs[] = {Handle};
9071 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9072 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9073 OMPRTL___tgt_mapper_num_components),
9074 OffloadingArgs);
9075 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9076 PreviousSize,
9077 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9078
9079 // Fill up the runtime mapper handle for all components.
9080 for (unsigned I = 0; I < BasePointers.size(); ++I) {
9081 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9082 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9083 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9084 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9085 llvm::Value *CurSizeArg = Sizes[I];
9086
9087 // Extract the MEMBER_OF field from the map type.
9088 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9089 MapperCGF.EmitBlock(MemberBB);
9090 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9091 llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9092 OriMapType,
9093 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9094 llvm::BasicBlock *MemberCombineBB =
9095 MapperCGF.createBasicBlock("omp.member.combine");
9096 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9097 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9098 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9099 // Add the number of pre-existing components to the MEMBER_OF field if it
9100 // is valid.
9101 MapperCGF.EmitBlock(MemberCombineBB);
9102 llvm::Value *CombinedMember =
9103 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9104 // Do nothing if it is not a member of previous components.
9105 MapperCGF.EmitBlock(TypeBB);
9106 llvm::PHINode *MemberMapType =
9107 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9108 MemberMapType->addIncoming(OriMapType, MemberBB);
9109 MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9110
9111 // Combine the map type inherited from user-defined mapper with that
9112 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9113 // bits of the \a MapType, which is the input argument of the mapper
9114 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9115 // bits of MemberMapType.
9116 // [OpenMP 5.0], 1.2.6. map-type decay.
9117 // | alloc | to | from | tofrom | release | delete
9118 // ----------------------------------------------------------
9119 // alloc | alloc | alloc | alloc | alloc | release | delete
9120 // to | alloc | to | alloc | to | release | delete
9121 // from | alloc | alloc | from | from | release | delete
9122 // tofrom | alloc | to | from | tofrom | release | delete
9123 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9124 MapType,
9125 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9126 MappableExprsHandler::OMP_MAP_FROM));
9127 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9128 llvm::BasicBlock *AllocElseBB =
9129 MapperCGF.createBasicBlock("omp.type.alloc.else");
9130 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9131 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9132 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9133 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9134 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9135 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9136 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9137 MapperCGF.EmitBlock(AllocBB);
9138 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9139 MemberMapType,
9140 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9141 MappableExprsHandler::OMP_MAP_FROM)));
9142 MapperCGF.Builder.CreateBr(EndBB);
9143 MapperCGF.EmitBlock(AllocElseBB);
9144 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9145 LeftToFrom,
9146 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9147 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9148 // In case of to, clear OMP_MAP_FROM.
9149 MapperCGF.EmitBlock(ToBB);
9150 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9151 MemberMapType,
9152 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9153 MapperCGF.Builder.CreateBr(EndBB);
9154 MapperCGF.EmitBlock(ToElseBB);
9155 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9156 LeftToFrom,
9157 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9158 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9159 // In case of from, clear OMP_MAP_TO.
9160 MapperCGF.EmitBlock(FromBB);
9161 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9162 MemberMapType,
9163 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9164 // In case of tofrom, do nothing.
9165 MapperCGF.EmitBlock(EndBB);
9166 llvm::PHINode *CurMapType =
9167 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9168 CurMapType->addIncoming(AllocMapType, AllocBB);
9169 CurMapType->addIncoming(ToMapType, ToBB);
9170 CurMapType->addIncoming(FromMapType, FromBB);
9171 CurMapType->addIncoming(MemberMapType, ToElseBB);
9172
9173 // TODO: call the corresponding mapper function if a user-defined mapper is
9174 // associated with this map clause.
9175 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9176 // data structure.
9177 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9178 CurSizeArg, CurMapType};
9179 MapperCGF.EmitRuntimeCall(
9180 OMPBuilder.getOrCreateRuntimeFunction(
9181 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9182 OffloadingArgs);
9183 }
9184
9185 // Update the pointer to point to the next element that needs to be mapped,
9186 // and check whether we have mapped all elements.
9187 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9188 PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9189 PtrPHI->addIncoming(PtrNext, BodyBB);
9190 llvm::Value *IsDone =
9191 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9192 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9193 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9194
9195 MapperCGF.EmitBlock(ExitBB);
9196 // Emit array deletion if this is an array section and \p MapType indicates
9197 // that deletion is required.
9198 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9199 ElementSize, DoneBB, /*IsInit=*/false);
9200
9201 // Emit the function exit block.
9202 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9203 MapperCGF.FinishFunction();
9204 UDMMap.try_emplace(D, Fn);
9205 if (CGF) {
9206 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9207 Decls.second.push_back(D);
9208 }
9209 }
9210
9211 /// Emit the array initialization or deletion portion for user-defined mapper
9212 /// code generation. First, it evaluates whether an array section is mapped and
9213 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9214 /// true, and \a MapType indicates to not delete this array, array
9215 /// initialization code is generated. If \a IsInit is false, and \a MapType
9216 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)9217 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9218 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9219 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9220 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9221 StringRef Prefix = IsInit ? ".init" : ".del";
9222
9223 // Evaluate if this is an array section.
9224 llvm::BasicBlock *IsDeleteBB =
9225 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9226 llvm::BasicBlock *BodyBB =
9227 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9228 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9229 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9230 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9231
9232 // Evaluate if we are going to delete this section.
9233 MapperCGF.EmitBlock(IsDeleteBB);
9234 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9235 MapType,
9236 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9237 llvm::Value *DeleteCond;
9238 if (IsInit) {
9239 DeleteCond = MapperCGF.Builder.CreateIsNull(
9240 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9241 } else {
9242 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9243 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9244 }
9245 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9246
9247 MapperCGF.EmitBlock(BodyBB);
9248 // Get the array size by multiplying element size and element number (i.e., \p
9249 // Size).
9250 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9251 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9252 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9253 // memory allocation/deletion purpose only.
9254 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9255 MapType,
9256 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9257 MappableExprsHandler::OMP_MAP_FROM)));
9258 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9259 // data structure.
9260 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9261 MapperCGF.EmitRuntimeCall(
9262 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9263 OMPRTL___tgt_push_mapper_component),
9264 OffloadingArgs);
9265 }
9266
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * DeviceID,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9267 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9268 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9269 llvm::Value *DeviceID,
9270 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9271 const OMPLoopDirective &D)>
9272 SizeEmitter) {
9273 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9274 const OMPExecutableDirective *TD = &D;
9275 // Get nested teams distribute kind directive, if any.
9276 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9277 TD = getNestedDistributeDirective(CGM.getContext(), D);
9278 if (!TD)
9279 return;
9280 const auto *LD = cast<OMPLoopDirective>(TD);
9281 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9282 PrePostActionTy &) {
9283 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9284 llvm::Value *Args[] = {DeviceID, NumIterations};
9285 CGF.EmitRuntimeCall(
9286 OMPBuilder.getOrCreateRuntimeFunction(
9287 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9288 Args);
9289 }
9290 };
9291 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9292 }
9293
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)9294 void CGOpenMPRuntime::emitTargetCall(
9295 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9296 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9297 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9298 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9299 const OMPLoopDirective &D)>
9300 SizeEmitter) {
9301 if (!CGF.HaveInsertPoint())
9302 return;
9303
9304 assert(OutlinedFn && "Invalid outlined function!");
9305
9306 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9307 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9308 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9309 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9310 PrePostActionTy &) {
9311 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9312 };
9313 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9314
9315 CodeGenFunction::OMPTargetDataInfo InputInfo;
9316 llvm::Value *MapTypesArray = nullptr;
9317 // Fill up the pointer arrays and transfer execution to the device.
9318 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9319 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9320 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9321 if (Device.getInt() == OMPC_DEVICE_ancestor) {
9322 // Reverse offloading is not supported, so just execute on the host.
9323 if (RequiresOuterTask) {
9324 CapturedVars.clear();
9325 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9326 }
9327 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9328 return;
9329 }
9330
9331 // On top of the arrays that were filled up, the target offloading call
9332 // takes as arguments the device id as well as the host pointer. The host
9333 // pointer is used by the runtime library to identify the current target
9334 // region, so it only has to be unique and not necessarily point to
9335 // anything. It could be the pointer to the outlined function that
9336 // implements the target region, but we aren't using that so that the
9337 // compiler doesn't need to keep that, and could therefore inline the host
9338 // function if proven worthwhile during optimization.
9339
9340 // From this point on, we need to have an ID of the target region defined.
9341 assert(OutlinedFnID && "Invalid outlined function ID!");
9342
9343 // Emit device ID if any.
9344 llvm::Value *DeviceID;
9345 if (Device.getPointer()) {
9346 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9347 Device.getInt() == OMPC_DEVICE_device_num) &&
9348 "Expected device_num modifier.");
9349 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9350 DeviceID =
9351 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9352 } else {
9353 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9354 }
9355
9356 // Emit the number of elements in the offloading arrays.
9357 llvm::Value *PointerNum =
9358 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9359
9360 // Return value of the runtime offloading call.
9361 llvm::Value *Return;
9362
9363 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9364 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9365
9366 // Emit tripcount for the target loop-based directive.
9367 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9368
9369 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9370 // The target region is an outlined function launched by the runtime
9371 // via calls __tgt_target() or __tgt_target_teams().
9372 //
9373 // __tgt_target() launches a target region with one team and one thread,
9374 // executing a serial region. This master thread may in turn launch
9375 // more threads within its team upon encountering a parallel region,
9376 // however, no additional teams can be launched on the device.
9377 //
9378 // __tgt_target_teams() launches a target region with one or more teams,
9379 // each with one or more threads. This call is required for target
9380 // constructs such as:
9381 // 'target teams'
9382 // 'target' / 'teams'
9383 // 'target teams distribute parallel for'
9384 // 'target parallel'
9385 // and so on.
9386 //
9387 // Note that on the host and CPU targets, the runtime implementation of
9388 // these calls simply call the outlined function without forking threads.
9389 // The outlined functions themselves have runtime calls to
9390 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9391 // the compiler in emitTeamsCall() and emitParallelCall().
9392 //
9393 // In contrast, on the NVPTX target, the implementation of
9394 // __tgt_target_teams() launches a GPU kernel with the requested number
9395 // of teams and threads so no additional calls to the runtime are required.
9396 if (NumTeams) {
9397 // If we have NumTeams defined this means that we have an enclosed teams
9398 // region. Therefore we also expect to have NumThreads defined. These two
9399 // values should be defined in the presence of a teams directive,
9400 // regardless of having any clauses associated. If the user is using teams
9401 // but no clauses, these two values will be the default that should be
9402 // passed to the runtime library - a 32-bit integer with the value zero.
9403 assert(NumThreads && "Thread limit expression should be available along "
9404 "with number of teams.");
9405 llvm::Value *OffloadingArgs[] = {DeviceID,
9406 OutlinedFnID,
9407 PointerNum,
9408 InputInfo.BasePointersArray.getPointer(),
9409 InputInfo.PointersArray.getPointer(),
9410 InputInfo.SizesArray.getPointer(),
9411 MapTypesArray,
9412 NumTeams,
9413 NumThreads};
9414 Return = CGF.EmitRuntimeCall(
9415 OMPBuilder.getOrCreateRuntimeFunction(
9416 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9417 : OMPRTL___tgt_target_teams),
9418 OffloadingArgs);
9419 } else {
9420 llvm::Value *OffloadingArgs[] = {DeviceID,
9421 OutlinedFnID,
9422 PointerNum,
9423 InputInfo.BasePointersArray.getPointer(),
9424 InputInfo.PointersArray.getPointer(),
9425 InputInfo.SizesArray.getPointer(),
9426 MapTypesArray};
9427 Return = CGF.EmitRuntimeCall(
9428 OMPBuilder.getOrCreateRuntimeFunction(
9429 CGM.getModule(),
9430 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9431 OffloadingArgs);
9432 }
9433
9434 // Check the error code and execute the host version if required.
9435 llvm::BasicBlock *OffloadFailedBlock =
9436 CGF.createBasicBlock("omp_offload.failed");
9437 llvm::BasicBlock *OffloadContBlock =
9438 CGF.createBasicBlock("omp_offload.cont");
9439 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9440 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9441
9442 CGF.EmitBlock(OffloadFailedBlock);
9443 if (RequiresOuterTask) {
9444 CapturedVars.clear();
9445 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9446 }
9447 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9448 CGF.EmitBranch(OffloadContBlock);
9449
9450 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9451 };
9452
9453 // Notify that the host version must be executed.
9454 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9455 RequiresOuterTask](CodeGenFunction &CGF,
9456 PrePostActionTy &) {
9457 if (RequiresOuterTask) {
9458 CapturedVars.clear();
9459 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9460 }
9461 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9462 };
9463
9464 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9465 &CapturedVars, RequiresOuterTask,
9466 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9467 // Fill up the arrays with all the captured variables.
9468 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9469 MappableExprsHandler::MapValuesArrayTy Pointers;
9470 MappableExprsHandler::MapValuesArrayTy Sizes;
9471 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9472
9473 // Get mappable expression information.
9474 MappableExprsHandler MEHandler(D, CGF);
9475 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9476
9477 auto RI = CS.getCapturedRecordDecl()->field_begin();
9478 auto CV = CapturedVars.begin();
9479 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9480 CE = CS.capture_end();
9481 CI != CE; ++CI, ++RI, ++CV) {
9482 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9483 MappableExprsHandler::MapValuesArrayTy CurPointers;
9484 MappableExprsHandler::MapValuesArrayTy CurSizes;
9485 MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9486 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9487
9488 // VLA sizes are passed to the outlined region by copy and do not have map
9489 // information associated.
9490 if (CI->capturesVariableArrayType()) {
9491 CurBasePointers.push_back(*CV);
9492 CurPointers.push_back(*CV);
9493 CurSizes.push_back(CGF.Builder.CreateIntCast(
9494 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9495 // Copy to the device as an argument. No need to retrieve it.
9496 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9497 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9498 MappableExprsHandler::OMP_MAP_IMPLICIT);
9499 } else {
9500 // If we have any information in the map clause, we use it, otherwise we
9501 // just do a default mapping.
9502 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9503 CurSizes, CurMapTypes, PartialStruct);
9504 if (CurBasePointers.empty())
9505 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9506 CurPointers, CurSizes, CurMapTypes);
9507 // Generate correct mapping for variables captured by reference in
9508 // lambdas.
9509 if (CI->capturesVariable())
9510 MEHandler.generateInfoForLambdaCaptures(
9511 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9512 CurMapTypes, LambdaPointers);
9513 }
9514 // We expect to have at least an element of information for this capture.
9515 assert(!CurBasePointers.empty() &&
9516 "Non-existing map pointer for capture!");
9517 assert(CurBasePointers.size() == CurPointers.size() &&
9518 CurBasePointers.size() == CurSizes.size() &&
9519 CurBasePointers.size() == CurMapTypes.size() &&
9520 "Inconsistent map information sizes!");
9521
9522 // If there is an entry in PartialStruct it means we have a struct with
9523 // individual members mapped. Emit an extra combined entry.
9524 if (PartialStruct.Base.isValid())
9525 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9526 CurMapTypes, PartialStruct);
9527
9528 // We need to append the results of this capture to what we already have.
9529 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9530 Pointers.append(CurPointers.begin(), CurPointers.end());
9531 Sizes.append(CurSizes.begin(), CurSizes.end());
9532 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9533 }
9534 // Adjust MEMBER_OF flags for the lambdas captures.
9535 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9536 Pointers, MapTypes);
9537 // Map other list items in the map clause which are not captured variables
9538 // but "declare target link" global variables.
9539 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9540 MapTypes);
9541
9542 TargetDataInfo Info;
9543 // Fill up the arrays and create the arguments.
9544 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9545 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9546 Info.PointersArray, Info.SizesArray,
9547 Info.MapTypesArray, Info);
9548 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9549 InputInfo.BasePointersArray =
9550 Address(Info.BasePointersArray, CGM.getPointerAlign());
9551 InputInfo.PointersArray =
9552 Address(Info.PointersArray, CGM.getPointerAlign());
9553 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9554 MapTypesArray = Info.MapTypesArray;
9555 if (RequiresOuterTask)
9556 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9557 else
9558 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9559 };
9560
9561 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9562 CodeGenFunction &CGF, PrePostActionTy &) {
9563 if (RequiresOuterTask) {
9564 CodeGenFunction::OMPTargetDataInfo InputInfo;
9565 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9566 } else {
9567 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9568 }
9569 };
9570
9571 // If we have a target function ID it means that we need to support
9572 // offloading, otherwise, just execute on the host. We need to execute on host
9573 // regardless of the conditional in the if clause if, e.g., the user do not
9574 // specify target triples.
9575 if (OutlinedFnID) {
9576 if (IfCond) {
9577 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9578 } else {
9579 RegionCodeGenTy ThenRCG(TargetThenGen);
9580 ThenRCG(CGF);
9581 }
9582 } else {
9583 RegionCodeGenTy ElseRCG(TargetElseGen);
9584 ElseRCG(CGF);
9585 }
9586 }
9587
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)9588 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9589 StringRef ParentName) {
9590 if (!S)
9591 return;
9592
9593 // Codegen OMP target directives that offload compute to the device.
9594 bool RequiresDeviceCodegen =
9595 isa<OMPExecutableDirective>(S) &&
9596 isOpenMPTargetExecutionDirective(
9597 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9598
9599 if (RequiresDeviceCodegen) {
9600 const auto &E = *cast<OMPExecutableDirective>(S);
9601 unsigned DeviceID;
9602 unsigned FileID;
9603 unsigned Line;
9604 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9605 FileID, Line);
9606
9607 // Is this a target region that should not be emitted as an entry point? If
9608 // so just signal we are done with this target region.
9609 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9610 ParentName, Line))
9611 return;
9612
9613 switch (E.getDirectiveKind()) {
9614 case OMPD_target:
9615 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9616 cast<OMPTargetDirective>(E));
9617 break;
9618 case OMPD_target_parallel:
9619 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9620 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9621 break;
9622 case OMPD_target_teams:
9623 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9624 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9625 break;
9626 case OMPD_target_teams_distribute:
9627 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9628 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9629 break;
9630 case OMPD_target_teams_distribute_simd:
9631 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9632 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9633 break;
9634 case OMPD_target_parallel_for:
9635 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9636 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9637 break;
9638 case OMPD_target_parallel_for_simd:
9639 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9640 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9641 break;
9642 case OMPD_target_simd:
9643 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9644 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9645 break;
9646 case OMPD_target_teams_distribute_parallel_for:
9647 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9648 CGM, ParentName,
9649 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9650 break;
9651 case OMPD_target_teams_distribute_parallel_for_simd:
9652 CodeGenFunction::
9653 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9654 CGM, ParentName,
9655 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9656 break;
9657 case OMPD_parallel:
9658 case OMPD_for:
9659 case OMPD_parallel_for:
9660 case OMPD_parallel_master:
9661 case OMPD_parallel_sections:
9662 case OMPD_for_simd:
9663 case OMPD_parallel_for_simd:
9664 case OMPD_cancel:
9665 case OMPD_cancellation_point:
9666 case OMPD_ordered:
9667 case OMPD_threadprivate:
9668 case OMPD_allocate:
9669 case OMPD_task:
9670 case OMPD_simd:
9671 case OMPD_sections:
9672 case OMPD_section:
9673 case OMPD_single:
9674 case OMPD_master:
9675 case OMPD_critical:
9676 case OMPD_taskyield:
9677 case OMPD_barrier:
9678 case OMPD_taskwait:
9679 case OMPD_taskgroup:
9680 case OMPD_atomic:
9681 case OMPD_flush:
9682 case OMPD_depobj:
9683 case OMPD_scan:
9684 case OMPD_teams:
9685 case OMPD_target_data:
9686 case OMPD_target_exit_data:
9687 case OMPD_target_enter_data:
9688 case OMPD_distribute:
9689 case OMPD_distribute_simd:
9690 case OMPD_distribute_parallel_for:
9691 case OMPD_distribute_parallel_for_simd:
9692 case OMPD_teams_distribute:
9693 case OMPD_teams_distribute_simd:
9694 case OMPD_teams_distribute_parallel_for:
9695 case OMPD_teams_distribute_parallel_for_simd:
9696 case OMPD_target_update:
9697 case OMPD_declare_simd:
9698 case OMPD_declare_variant:
9699 case OMPD_begin_declare_variant:
9700 case OMPD_end_declare_variant:
9701 case OMPD_declare_target:
9702 case OMPD_end_declare_target:
9703 case OMPD_declare_reduction:
9704 case OMPD_declare_mapper:
9705 case OMPD_taskloop:
9706 case OMPD_taskloop_simd:
9707 case OMPD_master_taskloop:
9708 case OMPD_master_taskloop_simd:
9709 case OMPD_parallel_master_taskloop:
9710 case OMPD_parallel_master_taskloop_simd:
9711 case OMPD_requires:
9712 case OMPD_unknown:
9713 default:
9714 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9715 }
9716 return;
9717 }
9718
9719 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9720 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9721 return;
9722
9723 scanForTargetRegionsFunctions(
9724 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9725 return;
9726 }
9727
9728 // If this is a lambda function, look into its body.
9729 if (const auto *L = dyn_cast<LambdaExpr>(S))
9730 S = L->getBody();
9731
9732 // Keep looking for target regions recursively.
9733 for (const Stmt *II : S->children())
9734 scanForTargetRegionsFunctions(II, ParentName);
9735 }
9736
emitTargetFunctions(GlobalDecl GD)9737 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9738 // If emitting code for the host, we do not process FD here. Instead we do
9739 // the normal code generation.
9740 if (!CGM.getLangOpts().OpenMPIsDevice) {
9741 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9742 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9743 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9744 // Do not emit device_type(nohost) functions for the host.
9745 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9746 return true;
9747 }
9748 return false;
9749 }
9750
9751 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9752 // Try to detect target regions in the function.
9753 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9754 StringRef Name = CGM.getMangledName(GD);
9755 scanForTargetRegionsFunctions(FD->getBody(), Name);
9756 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9757 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9758 // Do not emit device_type(nohost) functions for the host.
9759 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9760 return true;
9761 }
9762
9763 // Do not to emit function if it is not marked as declare target.
9764 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9765 AlreadyEmittedTargetDecls.count(VD) == 0;
9766 }
9767
emitTargetGlobalVariable(GlobalDecl GD)9768 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9769 if (!CGM.getLangOpts().OpenMPIsDevice)
9770 return false;
9771
9772 // Check if there are Ctors/Dtors in this declaration and look for target
9773 // regions in it. We use the complete variant to produce the kernel name
9774 // mangling.
9775 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9776 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9777 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9778 StringRef ParentName =
9779 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9780 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9781 }
9782 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9783 StringRef ParentName =
9784 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9785 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9786 }
9787 }
9788
9789 // Do not to emit variable if it is not marked as declare target.
9790 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9791 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9792 cast<VarDecl>(GD.getDecl()));
9793 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9794 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9795 HasRequiresUnifiedSharedMemory)) {
9796 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9797 return true;
9798 }
9799 return false;
9800 }
9801
9802 llvm::Constant *
registerTargetFirstprivateCopy(CodeGenFunction & CGF,const VarDecl * VD)9803 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9804 const VarDecl *VD) {
9805 assert(VD->getType().isConstant(CGM.getContext()) &&
9806 "Expected constant variable.");
9807 StringRef VarName;
9808 llvm::Constant *Addr;
9809 llvm::GlobalValue::LinkageTypes Linkage;
9810 QualType Ty = VD->getType();
9811 SmallString<128> Buffer;
9812 {
9813 unsigned DeviceID;
9814 unsigned FileID;
9815 unsigned Line;
9816 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9817 FileID, Line);
9818 llvm::raw_svector_ostream OS(Buffer);
9819 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9820 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9821 VarName = OS.str();
9822 }
9823 Linkage = llvm::GlobalValue::InternalLinkage;
9824 Addr =
9825 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9826 getDefaultFirstprivateAddressSpace());
9827 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9828 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9829 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9830 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9831 VarName, Addr, VarSize,
9832 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9833 return Addr;
9834 }
9835
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)9836 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9837 llvm::Constant *Addr) {
9838 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9839 !CGM.getLangOpts().OpenMPIsDevice)
9840 return;
9841 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9842 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9843 if (!Res) {
9844 if (CGM.getLangOpts().OpenMPIsDevice) {
9845 // Register non-target variables being emitted in device code (debug info
9846 // may cause this).
9847 StringRef VarName = CGM.getMangledName(VD);
9848 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9849 }
9850 return;
9851 }
9852 // Register declare target variables.
9853 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9854 StringRef VarName;
9855 CharUnits VarSize;
9856 llvm::GlobalValue::LinkageTypes Linkage;
9857
9858 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9859 !HasRequiresUnifiedSharedMemory) {
9860 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9861 VarName = CGM.getMangledName(VD);
9862 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9863 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9864 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9865 } else {
9866 VarSize = CharUnits::Zero();
9867 }
9868 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9869 // Temp solution to prevent optimizations of the internal variables.
9870 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9871 std::string RefName = getName({VarName, "ref"});
9872 if (!CGM.GetGlobalValue(RefName)) {
9873 llvm::Constant *AddrRef =
9874 getOrCreateInternalVariable(Addr->getType(), RefName);
9875 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9876 GVAddrRef->setConstant(/*Val=*/true);
9877 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9878 GVAddrRef->setInitializer(Addr);
9879 CGM.addCompilerUsedGlobal(GVAddrRef);
9880 }
9881 }
9882 } else {
9883 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9884 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9885 HasRequiresUnifiedSharedMemory)) &&
9886 "Declare target attribute must link or to with unified memory.");
9887 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9888 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9889 else
9890 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9891
9892 if (CGM.getLangOpts().OpenMPIsDevice) {
9893 VarName = Addr->getName();
9894 Addr = nullptr;
9895 } else {
9896 VarName = getAddrOfDeclareTargetVar(VD).getName();
9897 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9898 }
9899 VarSize = CGM.getPointerSize();
9900 Linkage = llvm::GlobalValue::WeakAnyLinkage;
9901 }
9902
9903 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9904 VarName, Addr, VarSize, Flags, Linkage);
9905 }
9906
emitTargetGlobal(GlobalDecl GD)9907 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9908 if (isa<FunctionDecl>(GD.getDecl()) ||
9909 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9910 return emitTargetFunctions(GD);
9911
9912 return emitTargetGlobalVariable(GD);
9913 }
9914
emitDeferredTargetDecls() const9915 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9916 for (const VarDecl *VD : DeferredGlobalVariables) {
9917 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9918 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9919 if (!Res)
9920 continue;
9921 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9922 !HasRequiresUnifiedSharedMemory) {
9923 CGM.EmitGlobal(VD);
9924 } else {
9925 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9926 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9927 HasRequiresUnifiedSharedMemory)) &&
9928 "Expected link clause or to clause with unified memory.");
9929 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9930 }
9931 }
9932 }
9933
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const9934 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9935 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9936 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9937 " Expected target-based directive.");
9938 }
9939
processRequiresDirective(const OMPRequiresDecl * D)9940 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9941 for (const OMPClause *Clause : D->clauselists()) {
9942 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9943 HasRequiresUnifiedSharedMemory = true;
9944 } else if (const auto *AC =
9945 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9946 switch (AC->getAtomicDefaultMemOrderKind()) {
9947 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9948 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9949 break;
9950 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9951 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9952 break;
9953 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9954 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9955 break;
9956 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9957 break;
9958 }
9959 }
9960 }
9961 }
9962
getDefaultMemoryOrdering() const9963 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9964 return RequiresAtomicOrdering;
9965 }
9966
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)9967 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9968 LangAS &AS) {
9969 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9970 return false;
9971 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9972 switch(A->getAllocatorType()) {
9973 case OMPAllocateDeclAttr::OMPNullMemAlloc:
9974 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9975 // Not supported, fallback to the default mem space.
9976 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9977 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9978 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9979 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9980 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9981 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9982 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9983 AS = LangAS::Default;
9984 return true;
9985 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9986 llvm_unreachable("Expected predefined allocator for the variables with the "
9987 "static storage.");
9988 }
9989 return false;
9990 }
9991
hasRequiresUnifiedSharedMemory() const9992 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9993 return HasRequiresUnifiedSharedMemory;
9994 }
9995
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)9996 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9997 CodeGenModule &CGM)
9998 : CGM(CGM) {
9999 if (CGM.getLangOpts().OpenMPIsDevice) {
10000 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10001 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10002 }
10003 }
10004
~DisableAutoDeclareTargetRAII()10005 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10006 if (CGM.getLangOpts().OpenMPIsDevice)
10007 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10008 }
10009
markAsGlobalTarget(GlobalDecl GD)10010 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10011 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10012 return true;
10013
10014 const auto *D = cast<FunctionDecl>(GD.getDecl());
10015 // Do not to emit function if it is marked as declare target as it was already
10016 // emitted.
10017 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10018 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10019 if (auto *F = dyn_cast_or_null<llvm::Function>(
10020 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10021 return !F->isDeclaration();
10022 return false;
10023 }
10024 return true;
10025 }
10026
10027 return !AlreadyEmittedTargetDecls.insert(D).second;
10028 }
10029
emitRequiresDirectiveRegFun()10030 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10031 // If we don't have entries or if we are emitting code for the device, we
10032 // don't need to do anything.
10033 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10034 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10035 (OffloadEntriesInfoManager.empty() &&
10036 !HasEmittedDeclareTargetRegion &&
10037 !HasEmittedTargetRegion))
10038 return nullptr;
10039
10040 // Create and register the function that handles the requires directives.
10041 ASTContext &C = CGM.getContext();
10042
10043 llvm::Function *RequiresRegFn;
10044 {
10045 CodeGenFunction CGF(CGM);
10046 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10047 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10048 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10049 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10050 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10051 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10052 // TODO: check for other requires clauses.
10053 // The requires directive takes effect only when a target region is
10054 // present in the compilation unit. Otherwise it is ignored and not
10055 // passed to the runtime. This avoids the runtime from throwing an error
10056 // for mismatching requires clauses across compilation units that don't
10057 // contain at least 1 target region.
10058 assert((HasEmittedTargetRegion ||
10059 HasEmittedDeclareTargetRegion ||
10060 !OffloadEntriesInfoManager.empty()) &&
10061 "Target or declare target region expected.");
10062 if (HasRequiresUnifiedSharedMemory)
10063 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10064 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10065 CGM.getModule(), OMPRTL___tgt_register_requires),
10066 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10067 CGF.FinishFunction();
10068 }
10069 return RequiresRegFn;
10070 }
10071
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)10072 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10073 const OMPExecutableDirective &D,
10074 SourceLocation Loc,
10075 llvm::Function *OutlinedFn,
10076 ArrayRef<llvm::Value *> CapturedVars) {
10077 if (!CGF.HaveInsertPoint())
10078 return;
10079
10080 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10081 CodeGenFunction::RunCleanupsScope Scope(CGF);
10082
10083 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10084 llvm::Value *Args[] = {
10085 RTLoc,
10086 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10087 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10088 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10089 RealArgs.append(std::begin(Args), std::end(Args));
10090 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10091
10092 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10093 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10094 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10095 }
10096
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)10097 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10098 const Expr *NumTeams,
10099 const Expr *ThreadLimit,
10100 SourceLocation Loc) {
10101 if (!CGF.HaveInsertPoint())
10102 return;
10103
10104 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10105
10106 llvm::Value *NumTeamsVal =
10107 NumTeams
10108 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10109 CGF.CGM.Int32Ty, /* isSigned = */ true)
10110 : CGF.Builder.getInt32(0);
10111
10112 llvm::Value *ThreadLimitVal =
10113 ThreadLimit
10114 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10115 CGF.CGM.Int32Ty, /* isSigned = */ true)
10116 : CGF.Builder.getInt32(0);
10117
10118 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10119 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10120 ThreadLimitVal};
10121 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10122 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10123 PushNumTeamsArgs);
10124 }
10125
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)10126 void CGOpenMPRuntime::emitTargetDataCalls(
10127 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10128 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10129 if (!CGF.HaveInsertPoint())
10130 return;
10131
10132 // Action used to replace the default codegen action and turn privatization
10133 // off.
10134 PrePostActionTy NoPrivAction;
10135
10136 // Generate the code for the opening of the data environment. Capture all the
10137 // arguments of the runtime call by reference because they are used in the
10138 // closing of the region.
10139 auto &&BeginThenGen = [this, &D, Device, &Info,
10140 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10141 // Fill up the arrays with all the mapped variables.
10142 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10143 MappableExprsHandler::MapValuesArrayTy Pointers;
10144 MappableExprsHandler::MapValuesArrayTy Sizes;
10145 MappableExprsHandler::MapFlagsArrayTy MapTypes;
10146
10147 // Get map clause information.
10148 MappableExprsHandler MCHandler(D, CGF);
10149 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10150
10151 // Fill up the arrays and create the arguments.
10152 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10153
10154 llvm::Value *BasePointersArrayArg = nullptr;
10155 llvm::Value *PointersArrayArg = nullptr;
10156 llvm::Value *SizesArrayArg = nullptr;
10157 llvm::Value *MapTypesArrayArg = nullptr;
10158 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10159 SizesArrayArg, MapTypesArrayArg, Info);
10160
10161 // Emit device ID if any.
10162 llvm::Value *DeviceID = nullptr;
10163 if (Device) {
10164 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10165 CGF.Int64Ty, /*isSigned=*/true);
10166 } else {
10167 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10168 }
10169
10170 // Emit the number of elements in the offloading arrays.
10171 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10172
10173 llvm::Value *OffloadingArgs[] = {
10174 DeviceID, PointerNum, BasePointersArrayArg,
10175 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10176 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10177 CGM.getModule(), OMPRTL___tgt_target_data_begin),
10178 OffloadingArgs);
10179
10180 // If device pointer privatization is required, emit the body of the region
10181 // here. It will have to be duplicated: with and without privatization.
10182 if (!Info.CaptureDeviceAddrMap.empty())
10183 CodeGen(CGF);
10184 };
10185
10186 // Generate code for the closing of the data region.
10187 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10188 PrePostActionTy &) {
10189 assert(Info.isValid() && "Invalid data environment closing arguments.");
10190
10191 llvm::Value *BasePointersArrayArg = nullptr;
10192 llvm::Value *PointersArrayArg = nullptr;
10193 llvm::Value *SizesArrayArg = nullptr;
10194 llvm::Value *MapTypesArrayArg = nullptr;
10195 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10196 SizesArrayArg, MapTypesArrayArg, Info);
10197
10198 // Emit device ID if any.
10199 llvm::Value *DeviceID = nullptr;
10200 if (Device) {
10201 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10202 CGF.Int64Ty, /*isSigned=*/true);
10203 } else {
10204 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10205 }
10206
10207 // Emit the number of elements in the offloading arrays.
10208 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10209
10210 llvm::Value *OffloadingArgs[] = {
10211 DeviceID, PointerNum, BasePointersArrayArg,
10212 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10213 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10214 CGM.getModule(), OMPRTL___tgt_target_data_end),
10215 OffloadingArgs);
10216 };
10217
10218 // If we need device pointer privatization, we need to emit the body of the
10219 // region with no privatization in the 'else' branch of the conditional.
10220 // Otherwise, we don't have to do anything.
10221 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10222 PrePostActionTy &) {
10223 if (!Info.CaptureDeviceAddrMap.empty()) {
10224 CodeGen.setAction(NoPrivAction);
10225 CodeGen(CGF);
10226 }
10227 };
10228
10229 // We don't have to do anything to close the region if the if clause evaluates
10230 // to false.
10231 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10232
10233 if (IfCond) {
10234 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10235 } else {
10236 RegionCodeGenTy RCG(BeginThenGen);
10237 RCG(CGF);
10238 }
10239
10240 // If we don't require privatization of device pointers, we emit the body in
10241 // between the runtime calls. This avoids duplicating the body code.
10242 if (Info.CaptureDeviceAddrMap.empty()) {
10243 CodeGen.setAction(NoPrivAction);
10244 CodeGen(CGF);
10245 }
10246
10247 if (IfCond) {
10248 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10249 } else {
10250 RegionCodeGenTy RCG(EndThenGen);
10251 RCG(CGF);
10252 }
10253 }
10254
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)10255 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10256 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10257 const Expr *Device) {
10258 if (!CGF.HaveInsertPoint())
10259 return;
10260
10261 assert((isa<OMPTargetEnterDataDirective>(D) ||
10262 isa<OMPTargetExitDataDirective>(D) ||
10263 isa<OMPTargetUpdateDirective>(D)) &&
10264 "Expecting either target enter, exit data, or update directives.");
10265
10266 CodeGenFunction::OMPTargetDataInfo InputInfo;
10267 llvm::Value *MapTypesArray = nullptr;
10268 // Generate the code for the opening of the data environment.
10269 auto &&ThenGen = [this, &D, Device, &InputInfo,
10270 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10271 // Emit device ID if any.
10272 llvm::Value *DeviceID = nullptr;
10273 if (Device) {
10274 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10275 CGF.Int64Ty, /*isSigned=*/true);
10276 } else {
10277 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10278 }
10279
10280 // Emit the number of elements in the offloading arrays.
10281 llvm::Constant *PointerNum =
10282 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10283
10284 llvm::Value *OffloadingArgs[] = {DeviceID,
10285 PointerNum,
10286 InputInfo.BasePointersArray.getPointer(),
10287 InputInfo.PointersArray.getPointer(),
10288 InputInfo.SizesArray.getPointer(),
10289 MapTypesArray};
10290
10291 // Select the right runtime function call for each expected standalone
10292 // directive.
10293 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10294 RuntimeFunction RTLFn;
10295 switch (D.getDirectiveKind()) {
10296 case OMPD_target_enter_data:
10297 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10298 : OMPRTL___tgt_target_data_begin;
10299 break;
10300 case OMPD_target_exit_data:
10301 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10302 : OMPRTL___tgt_target_data_end;
10303 break;
10304 case OMPD_target_update:
10305 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10306 : OMPRTL___tgt_target_data_update;
10307 break;
10308 case OMPD_parallel:
10309 case OMPD_for:
10310 case OMPD_parallel_for:
10311 case OMPD_parallel_master:
10312 case OMPD_parallel_sections:
10313 case OMPD_for_simd:
10314 case OMPD_parallel_for_simd:
10315 case OMPD_cancel:
10316 case OMPD_cancellation_point:
10317 case OMPD_ordered:
10318 case OMPD_threadprivate:
10319 case OMPD_allocate:
10320 case OMPD_task:
10321 case OMPD_simd:
10322 case OMPD_sections:
10323 case OMPD_section:
10324 case OMPD_single:
10325 case OMPD_master:
10326 case OMPD_critical:
10327 case OMPD_taskyield:
10328 case OMPD_barrier:
10329 case OMPD_taskwait:
10330 case OMPD_taskgroup:
10331 case OMPD_atomic:
10332 case OMPD_flush:
10333 case OMPD_depobj:
10334 case OMPD_scan:
10335 case OMPD_teams:
10336 case OMPD_target_data:
10337 case OMPD_distribute:
10338 case OMPD_distribute_simd:
10339 case OMPD_distribute_parallel_for:
10340 case OMPD_distribute_parallel_for_simd:
10341 case OMPD_teams_distribute:
10342 case OMPD_teams_distribute_simd:
10343 case OMPD_teams_distribute_parallel_for:
10344 case OMPD_teams_distribute_parallel_for_simd:
10345 case OMPD_declare_simd:
10346 case OMPD_declare_variant:
10347 case OMPD_begin_declare_variant:
10348 case OMPD_end_declare_variant:
10349 case OMPD_declare_target:
10350 case OMPD_end_declare_target:
10351 case OMPD_declare_reduction:
10352 case OMPD_declare_mapper:
10353 case OMPD_taskloop:
10354 case OMPD_taskloop_simd:
10355 case OMPD_master_taskloop:
10356 case OMPD_master_taskloop_simd:
10357 case OMPD_parallel_master_taskloop:
10358 case OMPD_parallel_master_taskloop_simd:
10359 case OMPD_target:
10360 case OMPD_target_simd:
10361 case OMPD_target_teams_distribute:
10362 case OMPD_target_teams_distribute_simd:
10363 case OMPD_target_teams_distribute_parallel_for:
10364 case OMPD_target_teams_distribute_parallel_for_simd:
10365 case OMPD_target_teams:
10366 case OMPD_target_parallel:
10367 case OMPD_target_parallel_for:
10368 case OMPD_target_parallel_for_simd:
10369 case OMPD_requires:
10370 case OMPD_unknown:
10371 default:
10372 llvm_unreachable("Unexpected standalone target data directive.");
10373 break;
10374 }
10375 CGF.EmitRuntimeCall(
10376 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10377 OffloadingArgs);
10378 };
10379
10380 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10381 CodeGenFunction &CGF, PrePostActionTy &) {
10382 // Fill up the arrays with all the mapped variables.
10383 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10384 MappableExprsHandler::MapValuesArrayTy Pointers;
10385 MappableExprsHandler::MapValuesArrayTy Sizes;
10386 MappableExprsHandler::MapFlagsArrayTy MapTypes;
10387
10388 // Get map clause information.
10389 MappableExprsHandler MEHandler(D, CGF);
10390 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10391
10392 TargetDataInfo Info;
10393 // Fill up the arrays and create the arguments.
10394 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10395 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10396 Info.PointersArray, Info.SizesArray,
10397 Info.MapTypesArray, Info);
10398 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10399 InputInfo.BasePointersArray =
10400 Address(Info.BasePointersArray, CGM.getPointerAlign());
10401 InputInfo.PointersArray =
10402 Address(Info.PointersArray, CGM.getPointerAlign());
10403 InputInfo.SizesArray =
10404 Address(Info.SizesArray, CGM.getPointerAlign());
10405 MapTypesArray = Info.MapTypesArray;
10406 if (D.hasClausesOfKind<OMPDependClause>())
10407 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10408 else
10409 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10410 };
10411
10412 if (IfCond) {
10413 emitIfClause(CGF, IfCond, TargetThenGen,
10414 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10415 } else {
10416 RegionCodeGenTy ThenRCG(TargetThenGen);
10417 ThenRCG(CGF);
10418 }
10419 }
10420
10421 namespace {
10422 /// Kind of parameter in a function with 'declare simd' directive.
10423 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10424 /// Attribute set of the parameter.
10425 struct ParamAttrTy {
10426 ParamKindTy Kind = Vector;
10427 llvm::APSInt StrideOrArg;
10428 llvm::APSInt Alignment;
10429 };
10430 } // namespace
10431
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10432 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10433 ArrayRef<ParamAttrTy> ParamAttrs) {
10434 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10435 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10436 // of that clause. The VLEN value must be power of 2.
10437 // In other case the notion of the function`s "characteristic data type" (CDT)
10438 // is used to compute the vector length.
10439 // CDT is defined in the following order:
10440 // a) For non-void function, the CDT is the return type.
10441 // b) If the function has any non-uniform, non-linear parameters, then the
10442 // CDT is the type of the first such parameter.
10443 // c) If the CDT determined by a) or b) above is struct, union, or class
10444 // type which is pass-by-value (except for the type that maps to the
10445 // built-in complex data type), the characteristic data type is int.
10446 // d) If none of the above three cases is applicable, the CDT is int.
10447 // The VLEN is then determined based on the CDT and the size of vector
10448 // register of that ISA for which current vector version is generated. The
10449 // VLEN is computed using the formula below:
10450 // VLEN = sizeof(vector_register) / sizeof(CDT),
10451 // where vector register size specified in section 3.2.1 Registers and the
10452 // Stack Frame of original AMD64 ABI document.
10453 QualType RetType = FD->getReturnType();
10454 if (RetType.isNull())
10455 return 0;
10456 ASTContext &C = FD->getASTContext();
10457 QualType CDT;
10458 if (!RetType.isNull() && !RetType->isVoidType()) {
10459 CDT = RetType;
10460 } else {
10461 unsigned Offset = 0;
10462 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10463 if (ParamAttrs[Offset].Kind == Vector)
10464 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10465 ++Offset;
10466 }
10467 if (CDT.isNull()) {
10468 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10469 if (ParamAttrs[I + Offset].Kind == Vector) {
10470 CDT = FD->getParamDecl(I)->getType();
10471 break;
10472 }
10473 }
10474 }
10475 }
10476 if (CDT.isNull())
10477 CDT = C.IntTy;
10478 CDT = CDT->getCanonicalTypeUnqualified();
10479 if (CDT->isRecordType() || CDT->isUnionType())
10480 CDT = C.IntTy;
10481 return C.getTypeSize(CDT);
10482 }
10483
10484 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)10485 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10486 const llvm::APSInt &VLENVal,
10487 ArrayRef<ParamAttrTy> ParamAttrs,
10488 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10489 struct ISADataTy {
10490 char ISA;
10491 unsigned VecRegSize;
10492 };
10493 ISADataTy ISAData[] = {
10494 {
10495 'b', 128
10496 }, // SSE
10497 {
10498 'c', 256
10499 }, // AVX
10500 {
10501 'd', 256
10502 }, // AVX2
10503 {
10504 'e', 512
10505 }, // AVX512
10506 };
10507 llvm::SmallVector<char, 2> Masked;
10508 switch (State) {
10509 case OMPDeclareSimdDeclAttr::BS_Undefined:
10510 Masked.push_back('N');
10511 Masked.push_back('M');
10512 break;
10513 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10514 Masked.push_back('N');
10515 break;
10516 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10517 Masked.push_back('M');
10518 break;
10519 }
10520 for (char Mask : Masked) {
10521 for (const ISADataTy &Data : ISAData) {
10522 SmallString<256> Buffer;
10523 llvm::raw_svector_ostream Out(Buffer);
10524 Out << "_ZGV" << Data.ISA << Mask;
10525 if (!VLENVal) {
10526 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10527 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10528 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10529 } else {
10530 Out << VLENVal;
10531 }
10532 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10533 switch (ParamAttr.Kind){
10534 case LinearWithVarStride:
10535 Out << 's' << ParamAttr.StrideOrArg;
10536 break;
10537 case Linear:
10538 Out << 'l';
10539 if (ParamAttr.StrideOrArg != 1)
10540 Out << ParamAttr.StrideOrArg;
10541 break;
10542 case Uniform:
10543 Out << 'u';
10544 break;
10545 case Vector:
10546 Out << 'v';
10547 break;
10548 }
10549 if (!!ParamAttr.Alignment)
10550 Out << 'a' << ParamAttr.Alignment;
10551 }
10552 Out << '_' << Fn->getName();
10553 Fn->addFnAttr(Out.str());
10554 }
10555 }
10556 }
10557
10558 // This are the Functions that are needed to mangle the name of the
10559 // vector functions generated by the compiler, according to the rules
10560 // defined in the "Vector Function ABI specifications for AArch64",
10561 // available at
10562 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10563
10564 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10565 ///
10566 /// TODO: Need to implement the behavior for reference marked with a
10567 /// var or no linear modifiers (1.b in the section). For this, we
10568 /// need to extend ParamKindTy to support the linear modifiers.
getAArch64MTV(QualType QT,ParamKindTy Kind)10569 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10570 QT = QT.getCanonicalType();
10571
10572 if (QT->isVoidType())
10573 return false;
10574
10575 if (Kind == ParamKindTy::Uniform)
10576 return false;
10577
10578 if (Kind == ParamKindTy::Linear)
10579 return false;
10580
10581 // TODO: Handle linear references with modifiers
10582
10583 if (Kind == ParamKindTy::LinearWithVarStride)
10584 return false;
10585
10586 return true;
10587 }
10588
10589 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)10590 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10591 QT = QT.getCanonicalType();
10592 unsigned Size = C.getTypeSize(QT);
10593
10594 // Only scalars and complex within 16 bytes wide set PVB to true.
10595 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10596 return false;
10597
10598 if (QT->isFloatingType())
10599 return true;
10600
10601 if (QT->isIntegerType())
10602 return true;
10603
10604 if (QT->isPointerType())
10605 return true;
10606
10607 // TODO: Add support for complex types (section 3.1.2, item 2).
10608
10609 return false;
10610 }
10611
10612 /// Computes the lane size (LS) of a return type or of an input parameter,
10613 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10614 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)10615 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10616 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10617 QualType PTy = QT.getCanonicalType()->getPointeeType();
10618 if (getAArch64PBV(PTy, C))
10619 return C.getTypeSize(PTy);
10620 }
10621 if (getAArch64PBV(QT, C))
10622 return C.getTypeSize(QT);
10623
10624 return C.getTypeSize(C.getUIntPtrType());
10625 }
10626
10627 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10628 // signature of the scalar function, as defined in 3.2.2 of the
10629 // AAVFABI.
10630 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)10631 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10632 QualType RetType = FD->getReturnType().getCanonicalType();
10633
10634 ASTContext &C = FD->getASTContext();
10635
10636 bool OutputBecomesInput = false;
10637
10638 llvm::SmallVector<unsigned, 8> Sizes;
10639 if (!RetType->isVoidType()) {
10640 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10641 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10642 OutputBecomesInput = true;
10643 }
10644 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10645 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10646 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10647 }
10648
10649 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10650 // The LS of a function parameter / return value can only be a power
10651 // of 2, starting from 8 bits, up to 128.
10652 assert(std::all_of(Sizes.begin(), Sizes.end(),
10653 [](unsigned Size) {
10654 return Size == 8 || Size == 16 || Size == 32 ||
10655 Size == 64 || Size == 128;
10656 }) &&
10657 "Invalid size");
10658
10659 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10660 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10661 OutputBecomesInput);
10662 }
10663
10664 /// Mangle the parameter part of the vector function name according to
10665 /// their OpenMP classification. The mangling function is defined in
10666 /// section 3.5 of the AAVFABI.
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)10667 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10668 SmallString<256> Buffer;
10669 llvm::raw_svector_ostream Out(Buffer);
10670 for (const auto &ParamAttr : ParamAttrs) {
10671 switch (ParamAttr.Kind) {
10672 case LinearWithVarStride:
10673 Out << "ls" << ParamAttr.StrideOrArg;
10674 break;
10675 case Linear:
10676 Out << 'l';
10677 // Don't print the step value if it is not present or if it is
10678 // equal to 1.
10679 if (ParamAttr.StrideOrArg != 1)
10680 Out << ParamAttr.StrideOrArg;
10681 break;
10682 case Uniform:
10683 Out << 'u';
10684 break;
10685 case Vector:
10686 Out << 'v';
10687 break;
10688 }
10689
10690 if (!!ParamAttr.Alignment)
10691 Out << 'a' << ParamAttr.Alignment;
10692 }
10693
10694 return std::string(Out.str());
10695 }
10696
10697 // Function used to add the attribute. The parameter `VLEN` is
10698 // templated to allow the use of "x" when targeting scalable functions
10699 // for SVE.
10700 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10701 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10702 char ISA, StringRef ParSeq,
10703 StringRef MangledName, bool OutputBecomesInput,
10704 llvm::Function *Fn) {
10705 SmallString<256> Buffer;
10706 llvm::raw_svector_ostream Out(Buffer);
10707 Out << Prefix << ISA << LMask << VLEN;
10708 if (OutputBecomesInput)
10709 Out << "v";
10710 Out << ParSeq << "_" << MangledName;
10711 Fn->addFnAttr(Out.str());
10712 }
10713
10714 // Helper function to generate the Advanced SIMD names depending on
10715 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)10716 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10717 StringRef Prefix, char ISA,
10718 StringRef ParSeq, StringRef MangledName,
10719 bool OutputBecomesInput,
10720 llvm::Function *Fn) {
10721 switch (NDS) {
10722 case 8:
10723 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10724 OutputBecomesInput, Fn);
10725 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10726 OutputBecomesInput, Fn);
10727 break;
10728 case 16:
10729 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10730 OutputBecomesInput, Fn);
10731 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10732 OutputBecomesInput, Fn);
10733 break;
10734 case 32:
10735 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10736 OutputBecomesInput, Fn);
10737 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10738 OutputBecomesInput, Fn);
10739 break;
10740 case 64:
10741 case 128:
10742 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10743 OutputBecomesInput, Fn);
10744 break;
10745 default:
10746 llvm_unreachable("Scalar type is too wide.");
10747 }
10748 }
10749
10750 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)10751 static void emitAArch64DeclareSimdFunction(
10752 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10753 ArrayRef<ParamAttrTy> ParamAttrs,
10754 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10755 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10756
10757 // Get basic data for building the vector signature.
10758 const auto Data = getNDSWDS(FD, ParamAttrs);
10759 const unsigned NDS = std::get<0>(Data);
10760 const unsigned WDS = std::get<1>(Data);
10761 const bool OutputBecomesInput = std::get<2>(Data);
10762
10763 // Check the values provided via `simdlen` by the user.
10764 // 1. A `simdlen(1)` doesn't produce vector signatures,
10765 if (UserVLEN == 1) {
10766 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10767 DiagnosticsEngine::Warning,
10768 "The clause simdlen(1) has no effect when targeting aarch64.");
10769 CGM.getDiags().Report(SLoc, DiagID);
10770 return;
10771 }
10772
10773 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10774 // Advanced SIMD output.
10775 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10776 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10777 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10778 "power of 2 when targeting Advanced SIMD.");
10779 CGM.getDiags().Report(SLoc, DiagID);
10780 return;
10781 }
10782
10783 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10784 // limits.
10785 if (ISA == 's' && UserVLEN != 0) {
10786 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10787 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10788 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10789 "lanes in the architectural constraints "
10790 "for SVE (min is 128-bit, max is "
10791 "2048-bit, by steps of 128-bit)");
10792 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10793 return;
10794 }
10795 }
10796
10797 // Sort out parameter sequence.
10798 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10799 StringRef Prefix = "_ZGV";
10800 // Generate simdlen from user input (if any).
10801 if (UserVLEN) {
10802 if (ISA == 's') {
10803 // SVE generates only a masked function.
10804 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10805 OutputBecomesInput, Fn);
10806 } else {
10807 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10808 // Advanced SIMD generates one or two functions, depending on
10809 // the `[not]inbranch` clause.
10810 switch (State) {
10811 case OMPDeclareSimdDeclAttr::BS_Undefined:
10812 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10813 OutputBecomesInput, Fn);
10814 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10815 OutputBecomesInput, Fn);
10816 break;
10817 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10818 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10819 OutputBecomesInput, Fn);
10820 break;
10821 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10822 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10823 OutputBecomesInput, Fn);
10824 break;
10825 }
10826 }
10827 } else {
10828 // If no user simdlen is provided, follow the AAVFABI rules for
10829 // generating the vector length.
10830 if (ISA == 's') {
10831 // SVE, section 3.4.1, item 1.
10832 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10833 OutputBecomesInput, Fn);
10834 } else {
10835 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10836 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10837 // two vector names depending on the use of the clause
10838 // `[not]inbranch`.
10839 switch (State) {
10840 case OMPDeclareSimdDeclAttr::BS_Undefined:
10841 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10842 OutputBecomesInput, Fn);
10843 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10844 OutputBecomesInput, Fn);
10845 break;
10846 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10847 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10848 OutputBecomesInput, Fn);
10849 break;
10850 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10851 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10852 OutputBecomesInput, Fn);
10853 break;
10854 }
10855 }
10856 }
10857 }
10858
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)10859 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10860 llvm::Function *Fn) {
10861 ASTContext &C = CGM.getContext();
10862 FD = FD->getMostRecentDecl();
10863 // Map params to their positions in function decl.
10864 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10865 if (isa<CXXMethodDecl>(FD))
10866 ParamPositions.try_emplace(FD, 0);
10867 unsigned ParamPos = ParamPositions.size();
10868 for (const ParmVarDecl *P : FD->parameters()) {
10869 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10870 ++ParamPos;
10871 }
10872 while (FD) {
10873 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10874 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10875 // Mark uniform parameters.
10876 for (const Expr *E : Attr->uniforms()) {
10877 E = E->IgnoreParenImpCasts();
10878 unsigned Pos;
10879 if (isa<CXXThisExpr>(E)) {
10880 Pos = ParamPositions[FD];
10881 } else {
10882 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10883 ->getCanonicalDecl();
10884 Pos = ParamPositions[PVD];
10885 }
10886 ParamAttrs[Pos].Kind = Uniform;
10887 }
10888 // Get alignment info.
10889 auto NI = Attr->alignments_begin();
10890 for (const Expr *E : Attr->aligneds()) {
10891 E = E->IgnoreParenImpCasts();
10892 unsigned Pos;
10893 QualType ParmTy;
10894 if (isa<CXXThisExpr>(E)) {
10895 Pos = ParamPositions[FD];
10896 ParmTy = E->getType();
10897 } else {
10898 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10899 ->getCanonicalDecl();
10900 Pos = ParamPositions[PVD];
10901 ParmTy = PVD->getType();
10902 }
10903 ParamAttrs[Pos].Alignment =
10904 (*NI)
10905 ? (*NI)->EvaluateKnownConstInt(C)
10906 : llvm::APSInt::getUnsigned(
10907 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10908 .getQuantity());
10909 ++NI;
10910 }
10911 // Mark linear parameters.
10912 auto SI = Attr->steps_begin();
10913 auto MI = Attr->modifiers_begin();
10914 for (const Expr *E : Attr->linears()) {
10915 E = E->IgnoreParenImpCasts();
10916 unsigned Pos;
10917 // Rescaling factor needed to compute the linear parameter
10918 // value in the mangled name.
10919 unsigned PtrRescalingFactor = 1;
10920 if (isa<CXXThisExpr>(E)) {
10921 Pos = ParamPositions[FD];
10922 } else {
10923 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10924 ->getCanonicalDecl();
10925 Pos = ParamPositions[PVD];
10926 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10927 PtrRescalingFactor = CGM.getContext()
10928 .getTypeSizeInChars(P->getPointeeType())
10929 .getQuantity();
10930 }
10931 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10932 ParamAttr.Kind = Linear;
10933 // Assuming a stride of 1, for `linear` without modifiers.
10934 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10935 if (*SI) {
10936 Expr::EvalResult Result;
10937 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10938 if (const auto *DRE =
10939 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10940 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10941 ParamAttr.Kind = LinearWithVarStride;
10942 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10943 ParamPositions[StridePVD->getCanonicalDecl()]);
10944 }
10945 }
10946 } else {
10947 ParamAttr.StrideOrArg = Result.Val.getInt();
10948 }
10949 }
10950 // If we are using a linear clause on a pointer, we need to
10951 // rescale the value of linear_step with the byte size of the
10952 // pointee type.
10953 if (Linear == ParamAttr.Kind)
10954 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10955 ++SI;
10956 ++MI;
10957 }
10958 llvm::APSInt VLENVal;
10959 SourceLocation ExprLoc;
10960 const Expr *VLENExpr = Attr->getSimdlen();
10961 if (VLENExpr) {
10962 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10963 ExprLoc = VLENExpr->getExprLoc();
10964 }
10965 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10966 if (CGM.getTriple().isX86()) {
10967 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10968 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10969 unsigned VLEN = VLENVal.getExtValue();
10970 StringRef MangledName = Fn->getName();
10971 if (CGM.getTarget().hasFeature("sve"))
10972 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10973 MangledName, 's', 128, Fn, ExprLoc);
10974 if (CGM.getTarget().hasFeature("neon"))
10975 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10976 MangledName, 'n', 128, Fn, ExprLoc);
10977 }
10978 }
10979 FD = FD->getPreviousDecl();
10980 }
10981 }
10982
10983 namespace {
10984 /// Cleanup action for doacross support.
10985 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10986 public:
10987 static const int DoacrossFinArgs = 2;
10988
10989 private:
10990 llvm::FunctionCallee RTLFn;
10991 llvm::Value *Args[DoacrossFinArgs];
10992
10993 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)10994 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10995 ArrayRef<llvm::Value *> CallArgs)
10996 : RTLFn(RTLFn) {
10997 assert(CallArgs.size() == DoacrossFinArgs);
10998 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10999 }
Emit(CodeGenFunction & CGF,Flags)11000 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11001 if (!CGF.HaveInsertPoint())
11002 return;
11003 CGF.EmitRuntimeCall(RTLFn, Args);
11004 }
11005 };
11006 } // namespace
11007
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11008 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11009 const OMPLoopDirective &D,
11010 ArrayRef<Expr *> NumIterations) {
11011 if (!CGF.HaveInsertPoint())
11012 return;
11013
11014 ASTContext &C = CGM.getContext();
11015 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11016 RecordDecl *RD;
11017 if (KmpDimTy.isNull()) {
11018 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11019 // kmp_int64 lo; // lower
11020 // kmp_int64 up; // upper
11021 // kmp_int64 st; // stride
11022 // };
11023 RD = C.buildImplicitRecord("kmp_dim");
11024 RD->startDefinition();
11025 addFieldToRecordDecl(C, RD, Int64Ty);
11026 addFieldToRecordDecl(C, RD, Int64Ty);
11027 addFieldToRecordDecl(C, RD, Int64Ty);
11028 RD->completeDefinition();
11029 KmpDimTy = C.getRecordType(RD);
11030 } else {
11031 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11032 }
11033 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11034 QualType ArrayTy =
11035 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11036
11037 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11038 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11039 enum { LowerFD = 0, UpperFD, StrideFD };
11040 // Fill dims with data.
11041 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11042 LValue DimsLVal = CGF.MakeAddrLValue(
11043 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11044 // dims.upper = num_iterations;
11045 LValue UpperLVal = CGF.EmitLValueForField(
11046 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11047 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11048 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11049 Int64Ty, NumIterations[I]->getExprLoc());
11050 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11051 // dims.stride = 1;
11052 LValue StrideLVal = CGF.EmitLValueForField(
11053 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11054 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11055 StrideLVal);
11056 }
11057
11058 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11059 // kmp_int32 num_dims, struct kmp_dim * dims);
11060 llvm::Value *Args[] = {
11061 emitUpdateLocation(CGF, D.getBeginLoc()),
11062 getThreadID(CGF, D.getBeginLoc()),
11063 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11064 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11065 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11066 CGM.VoidPtrTy)};
11067
11068 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11069 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11070 CGF.EmitRuntimeCall(RTLFn, Args);
11071 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11072 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11073 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11074 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11075 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11076 llvm::makeArrayRef(FiniArgs));
11077 }
11078
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)11079 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11080 const OMPDependClause *C) {
11081 QualType Int64Ty =
11082 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11083 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11084 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11085 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11086 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11087 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11088 const Expr *CounterVal = C->getLoopData(I);
11089 assert(CounterVal);
11090 llvm::Value *CntVal = CGF.EmitScalarConversion(
11091 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11092 CounterVal->getExprLoc());
11093 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11094 /*Volatile=*/false, Int64Ty);
11095 }
11096 llvm::Value *Args[] = {
11097 emitUpdateLocation(CGF, C->getBeginLoc()),
11098 getThreadID(CGF, C->getBeginLoc()),
11099 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11100 llvm::FunctionCallee RTLFn;
11101 if (C->getDependencyKind() == OMPC_DEPEND_source) {
11102 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11103 OMPRTL___kmpc_doacross_post);
11104 } else {
11105 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11106 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11107 OMPRTL___kmpc_doacross_wait);
11108 }
11109 CGF.EmitRuntimeCall(RTLFn, Args);
11110 }
11111
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const11112 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11113 llvm::FunctionCallee Callee,
11114 ArrayRef<llvm::Value *> Args) const {
11115 assert(Loc.isValid() && "Outlined function call location must be valid.");
11116 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11117
11118 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11119 if (Fn->doesNotThrow()) {
11120 CGF.EmitNounwindRuntimeCall(Fn, Args);
11121 return;
11122 }
11123 }
11124 CGF.EmitRuntimeCall(Callee, Args);
11125 }
11126
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const11127 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11128 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11129 ArrayRef<llvm::Value *> Args) const {
11130 emitCall(CGF, Loc, OutlinedFn, Args);
11131 }
11132
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)11133 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11134 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11135 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11136 HasEmittedDeclareTargetRegion = true;
11137 }
11138
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const11139 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11140 const VarDecl *NativeParam,
11141 const VarDecl *TargetParam) const {
11142 return CGF.GetAddrOfLocalVar(NativeParam);
11143 }
11144
11145 namespace {
11146 /// Cleanup action for allocate support.
11147 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11148 public:
11149 static const int CleanupArgs = 3;
11150
11151 private:
11152 llvm::FunctionCallee RTLFn;
11153 llvm::Value *Args[CleanupArgs];
11154
11155 public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11156 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11157 ArrayRef<llvm::Value *> CallArgs)
11158 : RTLFn(RTLFn) {
11159 assert(CallArgs.size() == CleanupArgs &&
11160 "Size of arguments does not match.");
11161 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11162 }
Emit(CodeGenFunction & CGF,Flags)11163 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11164 if (!CGF.HaveInsertPoint())
11165 return;
11166 CGF.EmitRuntimeCall(RTLFn, Args);
11167 }
11168 };
11169 } // namespace
11170
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)11171 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11172 const VarDecl *VD) {
11173 if (!VD)
11174 return Address::invalid();
11175 const VarDecl *CVD = VD->getCanonicalDecl();
11176 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11177 return Address::invalid();
11178 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11179 // Use the default allocation.
11180 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11181 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11182 !AA->getAllocator())
11183 return Address::invalid();
11184 llvm::Value *Size;
11185 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11186 if (CVD->getType()->isVariablyModifiedType()) {
11187 Size = CGF.getTypeSize(CVD->getType());
11188 // Align the size: ((size + align - 1) / align) * align
11189 Size = CGF.Builder.CreateNUWAdd(
11190 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11191 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11192 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11193 } else {
11194 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11195 Size = CGM.getSize(Sz.alignTo(Align));
11196 }
11197 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11198 assert(AA->getAllocator() &&
11199 "Expected allocator expression for non-default allocator.");
11200 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11201 // According to the standard, the original allocator type is a enum (integer).
11202 // Convert to pointer type, if required.
11203 if (Allocator->getType()->isIntegerTy())
11204 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11205 else if (Allocator->getType()->isPointerTy())
11206 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11207 CGM.VoidPtrTy);
11208 llvm::Value *Args[] = {ThreadID, Size, Allocator};
11209
11210 llvm::Value *Addr =
11211 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11212 CGM.getModule(), OMPRTL___kmpc_alloc),
11213 Args, getName({CVD->getName(), ".void.addr"}));
11214 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11215 Allocator};
11216 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11217 CGM.getModule(), OMPRTL___kmpc_free);
11218
11219 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11220 llvm::makeArrayRef(FiniArgs));
11221 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11222 Addr,
11223 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11224 getName({CVD->getName(), ".addr"}));
11225 return Address(Addr, Align);
11226 }
11227
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)11228 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11229 CodeGenModule &CGM, const OMPLoopDirective &S)
11230 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11231 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11232 if (!NeedToPush)
11233 return;
11234 NontemporalDeclsSet &DS =
11235 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11236 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11237 for (const Stmt *Ref : C->private_refs()) {
11238 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11239 const ValueDecl *VD;
11240 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11241 VD = DRE->getDecl();
11242 } else {
11243 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11244 assert((ME->isImplicitCXXThis() ||
11245 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11246 "Expected member of current class.");
11247 VD = ME->getMemberDecl();
11248 }
11249 DS.insert(VD);
11250 }
11251 }
11252 }
11253
~NontemporalDeclsRAII()11254 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11255 if (!NeedToPush)
11256 return;
11257 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11258 }
11259
isNontemporalDecl(const ValueDecl * VD) const11260 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11261 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11262
11263 return llvm::any_of(
11264 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11265 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11266 }
11267
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const11268 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11269 const OMPExecutableDirective &S,
11270 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11271 const {
11272 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11273 // Vars in target/task regions must be excluded completely.
11274 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11275 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11276 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11277 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11278 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11279 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11280 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11281 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11282 }
11283 }
11284 // Exclude vars in private clauses.
11285 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11286 for (const Expr *Ref : C->varlists()) {
11287 if (!Ref->getType()->isScalarType())
11288 continue;
11289 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11290 if (!DRE)
11291 continue;
11292 NeedToCheckForLPCs.insert(DRE->getDecl());
11293 }
11294 }
11295 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11296 for (const Expr *Ref : C->varlists()) {
11297 if (!Ref->getType()->isScalarType())
11298 continue;
11299 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11300 if (!DRE)
11301 continue;
11302 NeedToCheckForLPCs.insert(DRE->getDecl());
11303 }
11304 }
11305 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11306 for (const Expr *Ref : C->varlists()) {
11307 if (!Ref->getType()->isScalarType())
11308 continue;
11309 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11310 if (!DRE)
11311 continue;
11312 NeedToCheckForLPCs.insert(DRE->getDecl());
11313 }
11314 }
11315 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11316 for (const Expr *Ref : C->varlists()) {
11317 if (!Ref->getType()->isScalarType())
11318 continue;
11319 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11320 if (!DRE)
11321 continue;
11322 NeedToCheckForLPCs.insert(DRE->getDecl());
11323 }
11324 }
11325 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11326 for (const Expr *Ref : C->varlists()) {
11327 if (!Ref->getType()->isScalarType())
11328 continue;
11329 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11330 if (!DRE)
11331 continue;
11332 NeedToCheckForLPCs.insert(DRE->getDecl());
11333 }
11334 }
11335 for (const Decl *VD : NeedToCheckForLPCs) {
11336 for (const LastprivateConditionalData &Data :
11337 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11338 if (Data.DeclToUniqueName.count(VD) > 0) {
11339 if (!Data.Disabled)
11340 NeedToAddForLPCsAsDisabled.insert(VD);
11341 break;
11342 }
11343 }
11344 }
11345 }
11346
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)11347 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11348 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11349 : CGM(CGF.CGM),
11350 Action((CGM.getLangOpts().OpenMP >= 50 &&
11351 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11352 [](const OMPLastprivateClause *C) {
11353 return C->getKind() ==
11354 OMPC_LASTPRIVATE_conditional;
11355 }))
11356 ? ActionToDo::PushAsLastprivateConditional
11357 : ActionToDo::DoNotPush) {
11358 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11359 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11360 return;
11361 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11362 "Expected a push action.");
11363 LastprivateConditionalData &Data =
11364 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11365 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11366 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11367 continue;
11368
11369 for (const Expr *Ref : C->varlists()) {
11370 Data.DeclToUniqueName.insert(std::make_pair(
11371 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11372 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11373 }
11374 }
11375 Data.IVLVal = IVLVal;
11376 Data.Fn = CGF.CurFn;
11377 }
11378
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)11379 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11380 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11381 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11382 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11383 if (CGM.getLangOpts().OpenMP < 50)
11384 return;
11385 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11386 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11387 if (!NeedToAddForLPCsAsDisabled.empty()) {
11388 Action = ActionToDo::DisableLastprivateConditional;
11389 LastprivateConditionalData &Data =
11390 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11391 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11392 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11393 Data.Fn = CGF.CurFn;
11394 Data.Disabled = true;
11395 }
11396 }
11397
11398 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)11399 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11400 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11401 return LastprivateConditionalRAII(CGF, S);
11402 }
11403
~LastprivateConditionalRAII()11404 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11405 if (CGM.getLangOpts().OpenMP < 50)
11406 return;
11407 if (Action == ActionToDo::DisableLastprivateConditional) {
11408 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11409 "Expected list of disabled private vars.");
11410 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11411 }
11412 if (Action == ActionToDo::PushAsLastprivateConditional) {
11413 assert(
11414 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11415 "Expected list of lastprivate conditional vars.");
11416 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11417 }
11418 }
11419
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)11420 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11421 const VarDecl *VD) {
11422 ASTContext &C = CGM.getContext();
11423 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11424 if (I == LastprivateConditionalToTypes.end())
11425 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11426 QualType NewType;
11427 const FieldDecl *VDField;
11428 const FieldDecl *FiredField;
11429 LValue BaseLVal;
11430 auto VI = I->getSecond().find(VD);
11431 if (VI == I->getSecond().end()) {
11432 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11433 RD->startDefinition();
11434 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11435 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11436 RD->completeDefinition();
11437 NewType = C.getRecordType(RD);
11438 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11439 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11440 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11441 } else {
11442 NewType = std::get<0>(VI->getSecond());
11443 VDField = std::get<1>(VI->getSecond());
11444 FiredField = std::get<2>(VI->getSecond());
11445 BaseLVal = std::get<3>(VI->getSecond());
11446 }
11447 LValue FiredLVal =
11448 CGF.EmitLValueForField(BaseLVal, FiredField);
11449 CGF.EmitStoreOfScalar(
11450 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11451 FiredLVal);
11452 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11453 }
11454
11455 namespace {
11456 /// Checks if the lastprivate conditional variable is referenced in LHS.
11457 class LastprivateConditionalRefChecker final
11458 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11459 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11460 const Expr *FoundE = nullptr;
11461 const Decl *FoundD = nullptr;
11462 StringRef UniqueDeclName;
11463 LValue IVLVal;
11464 llvm::Function *FoundFn = nullptr;
11465 SourceLocation Loc;
11466
11467 public:
VisitDeclRefExpr(const DeclRefExpr * E)11468 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11469 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11470 llvm::reverse(LPM)) {
11471 auto It = D.DeclToUniqueName.find(E->getDecl());
11472 if (It == D.DeclToUniqueName.end())
11473 continue;
11474 if (D.Disabled)
11475 return false;
11476 FoundE = E;
11477 FoundD = E->getDecl()->getCanonicalDecl();
11478 UniqueDeclName = It->second;
11479 IVLVal = D.IVLVal;
11480 FoundFn = D.Fn;
11481 break;
11482 }
11483 return FoundE == E;
11484 }
VisitMemberExpr(const MemberExpr * E)11485 bool VisitMemberExpr(const MemberExpr *E) {
11486 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11487 return false;
11488 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11489 llvm::reverse(LPM)) {
11490 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11491 if (It == D.DeclToUniqueName.end())
11492 continue;
11493 if (D.Disabled)
11494 return false;
11495 FoundE = E;
11496 FoundD = E->getMemberDecl()->getCanonicalDecl();
11497 UniqueDeclName = It->second;
11498 IVLVal = D.IVLVal;
11499 FoundFn = D.Fn;
11500 break;
11501 }
11502 return FoundE == E;
11503 }
VisitStmt(const Stmt * S)11504 bool VisitStmt(const Stmt *S) {
11505 for (const Stmt *Child : S->children()) {
11506 if (!Child)
11507 continue;
11508 if (const auto *E = dyn_cast<Expr>(Child))
11509 if (!E->isGLValue())
11510 continue;
11511 if (Visit(Child))
11512 return true;
11513 }
11514 return false;
11515 }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)11516 explicit LastprivateConditionalRefChecker(
11517 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11518 : LPM(LPM) {}
11519 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const11520 getFoundData() const {
11521 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11522 }
11523 };
11524 } // namespace
11525
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)11526 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11527 LValue IVLVal,
11528 StringRef UniqueDeclName,
11529 LValue LVal,
11530 SourceLocation Loc) {
11531 // Last updated loop counter for the lastprivate conditional var.
11532 // int<xx> last_iv = 0;
11533 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11534 llvm::Constant *LastIV =
11535 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11536 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11537 IVLVal.getAlignment().getAsAlign());
11538 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11539
11540 // Last value of the lastprivate conditional.
11541 // decltype(priv_a) last_a;
11542 llvm::Constant *Last = getOrCreateInternalVariable(
11543 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11544 cast<llvm::GlobalVariable>(Last)->setAlignment(
11545 LVal.getAlignment().getAsAlign());
11546 LValue LastLVal =
11547 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11548
11549 // Global loop counter. Required to handle inner parallel-for regions.
11550 // iv
11551 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11552
11553 // #pragma omp critical(a)
11554 // if (last_iv <= iv) {
11555 // last_iv = iv;
11556 // last_a = priv_a;
11557 // }
11558 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11559 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11560 Action.Enter(CGF);
11561 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11562 // (last_iv <= iv) ? Check if the variable is updated and store new
11563 // value in global var.
11564 llvm::Value *CmpRes;
11565 if (IVLVal.getType()->isSignedIntegerType()) {
11566 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11567 } else {
11568 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11569 "Loop iteration variable must be integer.");
11570 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11571 }
11572 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11573 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11574 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11575 // {
11576 CGF.EmitBlock(ThenBB);
11577
11578 // last_iv = iv;
11579 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11580
11581 // last_a = priv_a;
11582 switch (CGF.getEvaluationKind(LVal.getType())) {
11583 case TEK_Scalar: {
11584 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11585 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11586 break;
11587 }
11588 case TEK_Complex: {
11589 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11590 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11591 break;
11592 }
11593 case TEK_Aggregate:
11594 llvm_unreachable(
11595 "Aggregates are not supported in lastprivate conditional.");
11596 }
11597 // }
11598 CGF.EmitBranch(ExitBB);
11599 // There is no need to emit line number for unconditional branch.
11600 (void)ApplyDebugLocation::CreateEmpty(CGF);
11601 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11602 };
11603
11604 if (CGM.getLangOpts().OpenMPSimd) {
11605 // Do not emit as a critical region as no parallel region could be emitted.
11606 RegionCodeGenTy ThenRCG(CodeGen);
11607 ThenRCG(CGF);
11608 } else {
11609 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11610 }
11611 }
11612
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)11613 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11614 const Expr *LHS) {
11615 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11616 return;
11617 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11618 if (!Checker.Visit(LHS))
11619 return;
11620 const Expr *FoundE;
11621 const Decl *FoundD;
11622 StringRef UniqueDeclName;
11623 LValue IVLVal;
11624 llvm::Function *FoundFn;
11625 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11626 Checker.getFoundData();
11627 if (FoundFn != CGF.CurFn) {
11628 // Special codegen for inner parallel regions.
11629 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11630 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11631 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11632 "Lastprivate conditional is not found in outer region.");
11633 QualType StructTy = std::get<0>(It->getSecond());
11634 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11635 LValue PrivLVal = CGF.EmitLValue(FoundE);
11636 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11637 PrivLVal.getAddress(CGF),
11638 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11639 LValue BaseLVal =
11640 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11641 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11642 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11643 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11644 FiredLVal, llvm::AtomicOrdering::Unordered,
11645 /*IsVolatile=*/true, /*isInit=*/false);
11646 return;
11647 }
11648
11649 // Private address of the lastprivate conditional in the current context.
11650 // priv_a
11651 LValue LVal = CGF.EmitLValue(FoundE);
11652 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11653 FoundE->getExprLoc());
11654 }
11655
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)11656 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11657 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11658 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11659 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11660 return;
11661 auto Range = llvm::reverse(LastprivateConditionalStack);
11662 auto It = llvm::find_if(
11663 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11664 if (It == Range.end() || It->Fn != CGF.CurFn)
11665 return;
11666 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11667 assert(LPCI != LastprivateConditionalToTypes.end() &&
11668 "Lastprivates must be registered already.");
11669 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11670 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11671 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11672 for (const auto &Pair : It->DeclToUniqueName) {
11673 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11674 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11675 continue;
11676 auto I = LPCI->getSecond().find(Pair.first);
11677 assert(I != LPCI->getSecond().end() &&
11678 "Lastprivate must be rehistered already.");
11679 // bool Cmp = priv_a.Fired != 0;
11680 LValue BaseLVal = std::get<3>(I->getSecond());
11681 LValue FiredLVal =
11682 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11683 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11684 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11685 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11686 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11687 // if (Cmp) {
11688 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11689 CGF.EmitBlock(ThenBB);
11690 Address Addr = CGF.GetAddrOfLocalVar(VD);
11691 LValue LVal;
11692 if (VD->getType()->isReferenceType())
11693 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11694 AlignmentSource::Decl);
11695 else
11696 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11697 AlignmentSource::Decl);
11698 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11699 D.getBeginLoc());
11700 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11701 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11702 // }
11703 }
11704 }
11705
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)11706 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11707 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11708 SourceLocation Loc) {
11709 if (CGF.getLangOpts().OpenMP < 50)
11710 return;
11711 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11712 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11713 "Unknown lastprivate conditional variable.");
11714 StringRef UniqueName = It->second;
11715 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11716 // The variable was not updated in the region - exit.
11717 if (!GV)
11718 return;
11719 LValue LPLVal = CGF.MakeAddrLValue(
11720 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11721 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11722 CGF.EmitStoreOfScalar(Res, PrivLVal);
11723 }
11724
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11725 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11726 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11727 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11728 llvm_unreachable("Not supported in SIMD-only mode");
11729 }
11730
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)11731 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11732 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11733 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11734 llvm_unreachable("Not supported in SIMD-only mode");
11735 }
11736
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)11737 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11738 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11739 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11740 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11741 bool Tied, unsigned &NumberOfParts) {
11742 llvm_unreachable("Not supported in SIMD-only mode");
11743 }
11744
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)11745 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11746 SourceLocation Loc,
11747 llvm::Function *OutlinedFn,
11748 ArrayRef<llvm::Value *> CapturedVars,
11749 const Expr *IfCond) {
11750 llvm_unreachable("Not supported in SIMD-only mode");
11751 }
11752
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)11753 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11754 CodeGenFunction &CGF, StringRef CriticalName,
11755 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11756 const Expr *Hint) {
11757 llvm_unreachable("Not supported in SIMD-only mode");
11758 }
11759
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)11760 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11761 const RegionCodeGenTy &MasterOpGen,
11762 SourceLocation Loc) {
11763 llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)11766 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11767 SourceLocation Loc) {
11768 llvm_unreachable("Not supported in SIMD-only mode");
11769 }
11770
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)11771 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11772 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11773 SourceLocation Loc) {
11774 llvm_unreachable("Not supported in SIMD-only mode");
11775 }
11776
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)11777 void CGOpenMPSIMDRuntime::emitSingleRegion(
11778 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11779 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11780 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11781 ArrayRef<const Expr *> AssignmentOps) {
11782 llvm_unreachable("Not supported in SIMD-only mode");
11783 }
11784
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)11785 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11786 const RegionCodeGenTy &OrderedOpGen,
11787 SourceLocation Loc,
11788 bool IsThreads) {
11789 llvm_unreachable("Not supported in SIMD-only mode");
11790 }
11791
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)11792 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11793 SourceLocation Loc,
11794 OpenMPDirectiveKind Kind,
11795 bool EmitChecks,
11796 bool ForceSimpleCall) {
11797 llvm_unreachable("Not supported in SIMD-only mode");
11798 }
11799
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)11800 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11801 CodeGenFunction &CGF, SourceLocation Loc,
11802 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11803 bool Ordered, const DispatchRTInput &DispatchValues) {
11804 llvm_unreachable("Not supported in SIMD-only mode");
11805 }
11806
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)11807 void CGOpenMPSIMDRuntime::emitForStaticInit(
11808 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11809 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11810 llvm_unreachable("Not supported in SIMD-only mode");
11811 }
11812
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)11813 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11814 CodeGenFunction &CGF, SourceLocation Loc,
11815 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11816 llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)11819 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11820 SourceLocation Loc,
11821 unsigned IVSize,
11822 bool IVSigned) {
11823 llvm_unreachable("Not supported in SIMD-only mode");
11824 }
11825
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)11826 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11827 SourceLocation Loc,
11828 OpenMPDirectiveKind DKind) {
11829 llvm_unreachable("Not supported in SIMD-only mode");
11830 }
11831
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)11832 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11833 SourceLocation Loc,
11834 unsigned IVSize, bool IVSigned,
11835 Address IL, Address LB,
11836 Address UB, Address ST) {
11837 llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)11840 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11841 llvm::Value *NumThreads,
11842 SourceLocation Loc) {
11843 llvm_unreachable("Not supported in SIMD-only mode");
11844 }
11845
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)11846 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11847 ProcBindKind ProcBind,
11848 SourceLocation Loc) {
11849 llvm_unreachable("Not supported in SIMD-only mode");
11850 }
11851
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)11852 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11853 const VarDecl *VD,
11854 Address VDAddr,
11855 SourceLocation Loc) {
11856 llvm_unreachable("Not supported in SIMD-only mode");
11857 }
11858
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)11859 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11860 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11861 CodeGenFunction *CGF) {
11862 llvm_unreachable("Not supported in SIMD-only mode");
11863 }
11864
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)11865 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11866 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11867 llvm_unreachable("Not supported in SIMD-only mode");
11868 }
11869
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)11870 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11871 ArrayRef<const Expr *> Vars,
11872 SourceLocation Loc,
11873 llvm::AtomicOrdering AO) {
11874 llvm_unreachable("Not supported in SIMD-only mode");
11875 }
11876
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11877 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11878 const OMPExecutableDirective &D,
11879 llvm::Function *TaskFunction,
11880 QualType SharedsTy, Address Shareds,
11881 const Expr *IfCond,
11882 const OMPTaskDataTy &Data) {
11883 llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)11886 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11887 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11888 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11889 const Expr *IfCond, const OMPTaskDataTy &Data) {
11890 llvm_unreachable("Not supported in SIMD-only mode");
11891 }
11892
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)11893 void CGOpenMPSIMDRuntime::emitReduction(
11894 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11895 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11896 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11897 assert(Options.SimpleReduction && "Only simple reduction is expected.");
11898 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11899 ReductionOps, Options);
11900 }
11901
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)11902 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11903 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11904 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11905 llvm_unreachable("Not supported in SIMD-only mode");
11906 }
11907
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)11908 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11909 SourceLocation Loc,
11910 bool IsWorksharingReduction) {
11911 llvm_unreachable("Not supported in SIMD-only mode");
11912 }
11913
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)11914 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11915 SourceLocation Loc,
11916 ReductionCodeGen &RCG,
11917 unsigned N) {
11918 llvm_unreachable("Not supported in SIMD-only mode");
11919 }
11920
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)11921 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11922 SourceLocation Loc,
11923 llvm::Value *ReductionsPtr,
11924 LValue SharedLVal) {
11925 llvm_unreachable("Not supported in SIMD-only mode");
11926 }
11927
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)11928 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11929 SourceLocation Loc) {
11930 llvm_unreachable("Not supported in SIMD-only mode");
11931 }
11932
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)11933 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11934 CodeGenFunction &CGF, SourceLocation Loc,
11935 OpenMPDirectiveKind CancelRegion) {
11936 llvm_unreachable("Not supported in SIMD-only mode");
11937 }
11938
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)11939 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11940 SourceLocation Loc, const Expr *IfCond,
11941 OpenMPDirectiveKind CancelRegion) {
11942 llvm_unreachable("Not supported in SIMD-only mode");
11943 }
11944
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)11945 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11946 const OMPExecutableDirective &D, StringRef ParentName,
11947 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11948 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11949 llvm_unreachable("Not supported in SIMD-only mode");
11950 }
11951
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)11952 void CGOpenMPSIMDRuntime::emitTargetCall(
11953 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11954 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11955 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11956 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11957 const OMPLoopDirective &D)>
11958 SizeEmitter) {
11959 llvm_unreachable("Not supported in SIMD-only mode");
11960 }
11961
emitTargetFunctions(GlobalDecl GD)11962 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11963 llvm_unreachable("Not supported in SIMD-only mode");
11964 }
11965
emitTargetGlobalVariable(GlobalDecl GD)11966 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11967 llvm_unreachable("Not supported in SIMD-only mode");
11968 }
11969
emitTargetGlobal(GlobalDecl GD)11970 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11971 return false;
11972 }
11973
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)11974 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11975 const OMPExecutableDirective &D,
11976 SourceLocation Loc,
11977 llvm::Function *OutlinedFn,
11978 ArrayRef<llvm::Value *> CapturedVars) {
11979 llvm_unreachable("Not supported in SIMD-only mode");
11980 }
11981
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)11982 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11983 const Expr *NumTeams,
11984 const Expr *ThreadLimit,
11985 SourceLocation Loc) {
11986 llvm_unreachable("Not supported in SIMD-only mode");
11987 }
11988
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11989 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11990 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11991 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11992 llvm_unreachable("Not supported in SIMD-only mode");
11993 }
11994
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11995 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11996 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11997 const Expr *Device) {
11998 llvm_unreachable("Not supported in SIMD-only mode");
11999 }
12000
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)12001 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12002 const OMPLoopDirective &D,
12003 ArrayRef<Expr *> NumIterations) {
12004 llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12007 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12008 const OMPDependClause *C) {
12009 llvm_unreachable("Not supported in SIMD-only mode");
12010 }
12011
12012 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const12013 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12014 const VarDecl *NativeParam) const {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016 }
12017
12018 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12019 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12020 const VarDecl *NativeParam,
12021 const VarDecl *TargetParam) const {
12022 llvm_unreachable("Not supported in SIMD-only mode");
12023 }
12024