1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51 /// Kinds of OpenMP regions used in codegen.
52 enum CGOpenMPRegionKind {
53 /// Region with outlined function for standalone 'parallel'
54 /// directive.
55 ParallelOutlinedRegion,
56 /// Region with outlined function for standalone 'task' directive.
57 TaskOutlinedRegion,
58 /// Region for constructs that do not require function outlining,
59 /// like 'for', 'sections', 'atomic' etc. directives.
60 InlinedRegion,
61 /// Region with outlined function for standalone 'target' directive.
62 TargetRegion,
63 };
64
CGOpenMPRegionInfo(const CapturedStmt & CS,const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)65 CGOpenMPRegionInfo(const CapturedStmt &CS,
66 const CGOpenMPRegionKind RegionKind,
67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68 bool HasCancel)
69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74 bool HasCancel)
75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76 Kind(Kind), HasCancel(HasCancel) {}
77
78 /// Get a variable or parameter for storing global thread id
79 /// inside OpenMP construct.
80 virtual const VarDecl *getThreadIDVariable() const = 0;
81
82 /// Emit the captured statement body.
83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84
85 /// Get an LValue for the current ThreadID variable.
86 /// \return LValue for thread id variable. This LValue always has type int32*.
87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88
emitUntiedSwitch(CodeGenFunction &)89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90
getRegionKind() const91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92
getDirectiveKind() const93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94
hasCancel() const95 bool hasCancel() const { return HasCancel; }
96
classof(const CGCapturedStmtInfo * Info)97 static bool classof(const CGCapturedStmtInfo *Info) {
98 return Info->getKind() == CR_OpenMP;
99 }
100
101 ~CGOpenMPRegionInfo() override = default;
102
103 protected:
104 CGOpenMPRegionKind RegionKind;
105 RegionCodeGenTy CodeGen;
106 OpenMPDirectiveKind Kind;
107 bool HasCancel;
108 };
109
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,StringRef HelperName)113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114 const RegionCodeGenTy &CodeGen,
115 OpenMPDirectiveKind Kind, bool HasCancel,
116 StringRef HelperName)
117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118 HasCancel),
119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121 }
122
123 /// Get a variable or parameter for storing global thread id
124 /// inside OpenMP construct.
getThreadIDVariable() const125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126
127 /// Get the name of the capture helper.
getHelperName() const128 StringRef getHelperName() const override { return HelperName; }
129
classof(const CGCapturedStmtInfo * Info)130 static bool classof(const CGCapturedStmtInfo *Info) {
131 return CGOpenMPRegionInfo::classof(Info) &&
132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133 ParallelOutlinedRegion;
134 }
135
136 private:
137 /// A variable or parameter storing global thread id for OpenMP
138 /// constructs.
139 const VarDecl *ThreadIDVar;
140 StringRef HelperName;
141 };
142
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146 class UntiedTaskActionTy final : public PrePostActionTy {
147 bool Untied;
148 const VarDecl *PartIDVar;
149 const RegionCodeGenTy UntiedCodeGen;
150 llvm::SwitchInst *UntiedSwitch = nullptr;
151
152 public:
UntiedTaskActionTy(bool Tied,const VarDecl * PartIDVar,const RegionCodeGenTy & UntiedCodeGen)153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154 const RegionCodeGenTy &UntiedCodeGen)
155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
Enter(CodeGenFunction & CGF)156 void Enter(CodeGenFunction &CGF) override {
157 if (Untied) {
158 // Emit task switching point.
159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160 CGF.GetAddrOfLocalVar(PartIDVar),
161 PartIDVar->getType()->castAs<PointerType>());
162 llvm::Value *Res =
163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166 CGF.EmitBlock(DoneBB);
167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170 CGF.Builder.GetInsertBlock());
171 emitUntiedSwitch(CGF);
172 }
173 }
emitUntiedSwitch(CodeGenFunction & CGF) const174 void emitUntiedSwitch(CodeGenFunction &CGF) const {
175 if (Untied) {
176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177 CGF.GetAddrOfLocalVar(PartIDVar),
178 PartIDVar->getType()->castAs<PointerType>());
179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180 PartIdLVal);
181 UntiedCodeGen(CGF);
182 CodeGenFunction::JumpDest CurPoint =
183 CGF.getJumpDestInCurrentScope(".untied.next.");
184 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187 CGF.Builder.GetInsertBlock());
188 CGF.EmitBranchThroughCleanup(CurPoint);
189 CGF.EmitBlock(CurPoint.getBlock());
190 }
191 }
getNumberOfParts() const192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193 };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt & CS,const VarDecl * ThreadIDVar,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,const UntiedTaskActionTy & Action)194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195 const VarDecl *ThreadIDVar,
196 const RegionCodeGenTy &CodeGen,
197 OpenMPDirectiveKind Kind, bool HasCancel,
198 const UntiedTaskActionTy &Action)
199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200 ThreadIDVar(ThreadIDVar), Action(Action) {
201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202 }
203
204 /// Get a variable or parameter for storing global thread id
205 /// inside OpenMP construct.
getThreadIDVariable() const206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207
208 /// Get an LValue for the current ThreadID variable.
209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210
211 /// Get the name of the capture helper.
getHelperName() const212 StringRef getHelperName() const override { return ".omp_outlined."; }
213
emitUntiedSwitch(CodeGenFunction & CGF)214 void emitUntiedSwitch(CodeGenFunction &CGF) override {
215 Action.emitUntiedSwitch(CGF);
216 }
217
classof(const CGCapturedStmtInfo * Info)218 static bool classof(const CGCapturedStmtInfo *Info) {
219 return CGOpenMPRegionInfo::classof(Info) &&
220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221 TaskOutlinedRegion;
222 }
223
224 private:
225 /// A variable or parameter storing global thread id for OpenMP
226 /// constructs.
227 const VarDecl *ThreadIDVar;
228 /// Action for emitting code for untied tasks.
229 const UntiedTaskActionTy &Action;
230 };
231
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo * OldCSI,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel)236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237 const RegionCodeGenTy &CodeGen,
238 OpenMPDirectiveKind Kind, bool HasCancel)
239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240 OldCSI(OldCSI),
241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242
243 // Retrieve the value of the context parameter.
getContextValue() const244 llvm::Value *getContextValue() const override {
245 if (OuterRegionInfo)
246 return OuterRegionInfo->getContextValue();
247 llvm_unreachable("No context value for inlined OpenMP region");
248 }
249
setContextValue(llvm::Value * V)250 void setContextValue(llvm::Value *V) override {
251 if (OuterRegionInfo) {
252 OuterRegionInfo->setContextValue(V);
253 return;
254 }
255 llvm_unreachable("No context value for inlined OpenMP region");
256 }
257
258 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const259 const FieldDecl *lookup(const VarDecl *VD) const override {
260 if (OuterRegionInfo)
261 return OuterRegionInfo->lookup(VD);
262 // If there is no outer outlined region,no need to lookup in a list of
263 // captured variables, we can use the original one.
264 return nullptr;
265 }
266
getThisFieldDecl() const267 FieldDecl *getThisFieldDecl() const override {
268 if (OuterRegionInfo)
269 return OuterRegionInfo->getThisFieldDecl();
270 return nullptr;
271 }
272
273 /// Get a variable or parameter for storing global thread id
274 /// inside OpenMP construct.
getThreadIDVariable() const275 const VarDecl *getThreadIDVariable() const override {
276 if (OuterRegionInfo)
277 return OuterRegionInfo->getThreadIDVariable();
278 return nullptr;
279 }
280
281 /// Get an LValue for the current ThreadID variable.
getThreadIDVariableLValue(CodeGenFunction & CGF)282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285 llvm_unreachable("No LValue for inlined OpenMP construct");
286 }
287
288 /// Get the name of the capture helper.
getHelperName() const289 StringRef getHelperName() const override {
290 if (auto *OuterRegionInfo = getOldCSI())
291 return OuterRegionInfo->getHelperName();
292 llvm_unreachable("No helper name for inlined OpenMP construct");
293 }
294
emitUntiedSwitch(CodeGenFunction & CGF)295 void emitUntiedSwitch(CodeGenFunction &CGF) override {
296 if (OuterRegionInfo)
297 OuterRegionInfo->emitUntiedSwitch(CGF);
298 }
299
getOldCSI() const300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301
classof(const CGCapturedStmtInfo * Info)302 static bool classof(const CGCapturedStmtInfo *Info) {
303 return CGOpenMPRegionInfo::classof(Info) &&
304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305 }
306
307 ~CGOpenMPInlinedRegionInfo() override = default;
308
309 private:
310 /// CodeGen info about outer OpenMP region.
311 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312 CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
CGOpenMPTargetRegionInfo(const CapturedStmt & CS,const RegionCodeGenTy & CodeGen,StringRef HelperName)322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323 const RegionCodeGenTy &CodeGen, StringRef HelperName)
324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325 /*HasCancel=*/false),
326 HelperName(HelperName) {}
327
328 /// This is unused for target regions because each starts executing
329 /// with a single thread.
getThreadIDVariable() const330 const VarDecl *getThreadIDVariable() const override { return nullptr; }
331
332 /// Get the name of the capture helper.
getHelperName() const333 StringRef getHelperName() const override { return HelperName; }
334
classof(const CGCapturedStmtInfo * Info)335 static bool classof(const CGCapturedStmtInfo *Info) {
336 return CGOpenMPRegionInfo::classof(Info) &&
337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338 }
339
340 private:
341 StringRef HelperName;
342 };
343
EmptyCodeGen(CodeGenFunction &,PrePostActionTy &)344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345 llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
CGOpenMPInnerExprInfo(CodeGenFunction & CGF,const CapturedStmt & CS)351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353 OMPD_unknown,
354 /*HasCancel=*/false),
355 PrivScope(CGF) {
356 // Make sure the globals captured in the provided statement are local by
357 // using the privatization logic. We assume the same variable is not
358 // captured more than once.
359 for (const auto &C : CS.captures()) {
360 if (!C.capturesVariable() && !C.capturesVariableByCopy())
361 continue;
362
363 const VarDecl *VD = C.getCapturedVar();
364 if (VD->isLocalVarDeclOrParm())
365 continue;
366
367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368 /*RefersToEnclosingVariableOrCapture=*/false,
369 VD->getType().getNonReferenceType(), VK_LValue,
370 C.getLocation());
371 PrivScope.addPrivate(
372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373 }
374 (void)PrivScope.Privatize();
375 }
376
377 /// Lookup the captured field decl for a variable.
lookup(const VarDecl * VD) const378 const FieldDecl *lookup(const VarDecl *VD) const override {
379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380 return FD;
381 return nullptr;
382 }
383
384 /// Emit the captured statement body.
EmitBody(CodeGenFunction & CGF,const Stmt * S)385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386 llvm_unreachable("No body for expressions");
387 }
388
389 /// Get a variable or parameter for storing global thread id
390 /// inside OpenMP construct.
getThreadIDVariable() const391 const VarDecl *getThreadIDVariable() const override {
392 llvm_unreachable("No thread id for expressions");
393 }
394
395 /// Get the name of the capture helper.
getHelperName() const396 StringRef getHelperName() const override {
397 llvm_unreachable("No helper name for expressions");
398 }
399
classof(const CGCapturedStmtInfo * Info)400 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401
402 private:
403 /// Private scope to capture global variables.
404 CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409 CodeGenFunction &CGF;
410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411 FieldDecl *LambdaThisCaptureField = nullptr;
412 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 bool NoInheritance = false;
414
415 public:
416 /// Constructs region for combined constructs.
417 /// \param CodeGen Code generation sequence for combined directives. Includes
418 /// a list of functions used for code generation of implicitly inlined
419 /// regions.
InlinedOpenMPRegionRAII(CodeGenFunction & CGF,const RegionCodeGenTy & CodeGen,OpenMPDirectiveKind Kind,bool HasCancel,bool NoInheritance=true)420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421 OpenMPDirectiveKind Kind, bool HasCancel,
422 bool NoInheritance = true)
423 : CGF(CGF), NoInheritance(NoInheritance) {
424 // Start emission for the construct.
425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427 if (NoInheritance) {
428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430 CGF.LambdaThisCaptureField = nullptr;
431 BlockInfo = CGF.BlockInfo;
432 CGF.BlockInfo = nullptr;
433 }
434 }
435
~InlinedOpenMPRegionRAII()436 ~InlinedOpenMPRegionRAII() {
437 // Restore original CapturedStmtInfo only if we're done with code emission.
438 auto *OldCSI =
439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440 delete CGF.CapturedStmtInfo;
441 CGF.CapturedStmtInfo = OldCSI;
442 if (NoInheritance) {
443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445 CGF.BlockInfo = BlockInfo;
446 }
447 }
448 };
449
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454 /// Use trampoline for internal microtask.
455 OMP_IDENT_IMD = 0x01,
456 /// Use c-style ident structure.
457 OMP_IDENT_KMPC = 0x02,
458 /// Atomic reduction option for kmpc_reduce.
459 OMP_ATOMIC_REDUCE = 0x10,
460 /// Explicit 'barrier' directive.
461 OMP_IDENT_BARRIER_EXPL = 0x20,
462 /// Implicit barrier in code.
463 OMP_IDENT_BARRIER_IMPL = 0x40,
464 /// Implicit barrier in 'for' directive.
465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466 /// Implicit barrier in 'sections' directive.
467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468 /// Implicit barrier in 'single' directive.
469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470 /// Call of __kmp_for_static_init for static loop.
471 OMP_IDENT_WORK_LOOP = 0x200,
472 /// Call of __kmp_for_static_init for sections.
473 OMP_IDENT_WORK_SECTIONS = 0x400,
474 /// Call of __kmp_for_static_init for distribute.
475 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483 /// flag undefined.
484 OMP_REQ_UNDEFINED = 0x000,
485 /// no requires clause present.
486 OMP_REQ_NONE = 0x001,
487 /// reverse_offload clause.
488 OMP_REQ_REVERSE_OFFLOAD = 0x002,
489 /// unified_address clause.
490 OMP_REQ_UNIFIED_ADDRESS = 0x004,
491 /// unified_shared_memory clause.
492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
493 /// dynamic_allocators clause.
494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497
498 enum OpenMPOffloadingReservedDeviceIDs {
499 /// Device ID if the device was not defined, runtime should get it
500 /// from environment variables in the spec.
501 OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 /// kmp_int32 reserved_1; /**< might be used in Fortran;
511 /// see above */
512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
513 /// KMP_IDENT_KMPC identifies this union
514 /// member */
515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
516 /// see above */
517 ///#if USE_ITT_BUILD
518 /// /* but currently used for storing
519 /// region-specific ITT */
520 /// /* contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
523 /// C++ */
524 /// char const *psource; /**< String describing the source location.
525 /// The string is composed of semi-colon separated
526 // fields which describe the source file,
527 /// the function and a pair of line numbers that
528 /// delimit the construct.
529 /// */
530 /// } ident_t;
531 enum IdentFieldIndex {
532 /// might be used in Fortran
533 IdentField_Reserved_1,
534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535 IdentField_Flags,
536 /// Not really used in Fortran any more
537 IdentField_Reserved_2,
538 /// Source[4] in Fortran, do not use for C++
539 IdentField_Reserved_3,
540 /// String describing the source location. The string is composed of
541 /// semi-colon separated fields which describe the source file, the function
542 /// and a pair of line numbers that delimit the construct.
543 IdentField_PSource
544 };
545
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549 /// Lower bound for default (unordered) versions.
550 OMP_sch_lower = 32,
551 OMP_sch_static_chunked = 33,
552 OMP_sch_static = 34,
553 OMP_sch_dynamic_chunked = 35,
554 OMP_sch_guided_chunked = 36,
555 OMP_sch_runtime = 37,
556 OMP_sch_auto = 38,
557 /// static with chunk adjustment (e.g., simd)
558 OMP_sch_static_balanced_chunked = 45,
559 /// Lower bound for 'ordered' versions.
560 OMP_ord_lower = 64,
561 OMP_ord_static_chunked = 65,
562 OMP_ord_static = 66,
563 OMP_ord_dynamic_chunked = 67,
564 OMP_ord_guided_chunked = 68,
565 OMP_ord_runtime = 69,
566 OMP_ord_auto = 70,
567 OMP_sch_default = OMP_sch_static,
568 /// dist_schedule types
569 OMP_dist_sch_static_chunked = 91,
570 OMP_dist_sch_static = 92,
571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572 /// Set if the monotonic schedule modifier was present.
573 OMP_sch_modifier_monotonic = (1 << 29),
574 /// Set if the nonmonotonic schedule modifier was present.
575 OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581 PrePostActionTy *Action;
582
583 public:
CleanupTy(PrePostActionTy * Action)584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
Emit(CodeGenFunction & CGF,Flags)585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586 if (!CGF.HaveInsertPoint())
587 return;
588 Action->Exit(CGF);
589 }
590 };
591
592 } // anonymous namespace
593
operator ()(CodeGenFunction & CGF) const594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595 CodeGenFunction::RunCleanupsScope Scope(CGF);
596 if (PrePostAction) {
597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598 Callback(CodeGen, CGF, *PrePostAction);
599 } else {
600 PrePostActionTy Action;
601 Callback(CodeGen, CGF, Action);
602 }
603 }
604
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
getReductionInit(const Expr * ReductionOp)608 getReductionInit(const Expr *ReductionOp) {
609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611 if (const auto *DRE =
612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614 return DRD;
615 return nullptr;
616 }
617
emitInitWithReductionInitializer(CodeGenFunction & CGF,const OMPDeclareReductionDecl * DRD,const Expr * InitOp,Address Private,Address Original,QualType Ty)618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619 const OMPDeclareReductionDecl *DRD,
620 const Expr *InitOp,
621 Address Private, Address Original,
622 QualType Ty) {
623 if (DRD->getInitializer()) {
624 std::pair<llvm::Function *, llvm::Function *> Reduction =
625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626 const auto *CE = cast<CallExpr>(InitOp);
627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630 const auto *LHSDRE =
631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632 const auto *RHSDRE =
633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636 [=]() { return Private; });
637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638 [=]() { return Original; });
639 (void)PrivateScope.Privatize();
640 RValue Func = RValue::get(Reduction.second);
641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642 CGF.EmitIgnoredExpr(InitOp);
643 } else {
644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646 auto *GV = new llvm::GlobalVariable(
647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648 llvm::GlobalValue::PrivateLinkage, Init, Name);
649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650 RValue InitRVal;
651 switch (CGF.getEvaluationKind(Ty)) {
652 case TEK_Scalar:
653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654 break;
655 case TEK_Complex:
656 InitRVal =
657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658 break;
659 case TEK_Aggregate: {
660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663 /*IsInitializer=*/false);
664 return;
665 }
666 }
667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670 /*IsInitializer=*/false);
671 }
672 }
673
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
EmitOMPAggregateInit(CodeGenFunction & CGF,Address DestAddr,QualType Type,bool EmitDeclareReductionInit,const Expr * Init,const OMPDeclareReductionDecl * DRD,Address SrcAddr=Address::invalid ())679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680 QualType Type, bool EmitDeclareReductionInit,
681 const Expr *Init,
682 const OMPDeclareReductionDecl *DRD,
683 Address SrcAddr = Address::invalid()) {
684 // Perform element-by-element initialization.
685 QualType ElementTy;
686
687 // Drill down to the base element type on both arrays.
688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690 DestAddr =
691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692 if (DRD)
693 SrcAddr =
694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695
696 llvm::Value *SrcBegin = nullptr;
697 if (DRD)
698 SrcBegin = SrcAddr.getPointer();
699 llvm::Value *DestBegin = DestAddr.getPointer();
700 // Cast from pointer to array type to pointer to single element.
701 llvm::Value *DestEnd =
702 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703 // The basic structure here is a while-do loop.
704 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706 llvm::Value *IsEmpty =
707 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709
710 // Enter the loop body, making that address the current address.
711 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712 CGF.EmitBlock(BodyBB);
713
714 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715
716 llvm::PHINode *SrcElementPHI = nullptr;
717 Address SrcElementCurrent = Address::invalid();
718 if (DRD) {
719 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720 "omp.arraycpy.srcElementPast");
721 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722 SrcElementCurrent =
723 Address(SrcElementPHI,
724 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725 }
726 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728 DestElementPHI->addIncoming(DestBegin, EntryBB);
729 Address DestElementCurrent =
730 Address(DestElementPHI,
731 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732
733 // Emit copy.
734 {
735 CodeGenFunction::RunCleanupsScope InitScope(CGF);
736 if (EmitDeclareReductionInit) {
737 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738 SrcElementCurrent, ElementTy);
739 } else
740 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741 /*IsInitializer=*/false);
742 }
743
744 if (DRD) {
745 // Shift the address forward by one element.
746 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748 "omp.arraycpy.dest.element");
749 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750 }
751
752 // Shift the address forward by one element.
753 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755 "omp.arraycpy.dest.element");
756 // Check whether we've reached the end.
757 llvm::Value *Done =
758 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761
762 // Done.
763 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765
emitSharedLValue(CodeGenFunction & CGF,const Expr * E)766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767 return CGF.EmitOMPSharedLValue(E);
768 }
769
emitSharedLValueUB(CodeGenFunction & CGF,const Expr * E)770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771 const Expr *E) {
772 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774 return LValue();
775 }
776
emitAggregateInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,const OMPDeclareReductionDecl * DRD)777 void ReductionCodeGen::emitAggregateInitialization(
778 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779 const OMPDeclareReductionDecl *DRD) {
780 // Emit VarDecl with copy init for arrays.
781 // Get the address of the original variable captured in current
782 // captured region.
783 const auto *PrivateVD =
784 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785 bool EmitDeclareReductionInit =
786 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788 EmitDeclareReductionInit,
789 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790 : PrivateVD->getInit(),
791 DRD, SharedLVal.getAddress(CGF));
792 }
793
ReductionCodeGen(ArrayRef<const Expr * > Shareds,ArrayRef<const Expr * > Origs,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > ReductionOps)794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795 ArrayRef<const Expr *> Origs,
796 ArrayRef<const Expr *> Privates,
797 ArrayRef<const Expr *> ReductionOps) {
798 ClausesData.reserve(Shareds.size());
799 SharedAddresses.reserve(Shareds.size());
800 Sizes.reserve(Shareds.size());
801 BaseDecls.reserve(Shareds.size());
802 const auto *IOrig = Origs.begin();
803 const auto *IPriv = Privates.begin();
804 const auto *IRed = ReductionOps.begin();
805 for (const Expr *Ref : Shareds) {
806 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807 std::advance(IOrig, 1);
808 std::advance(IPriv, 1);
809 std::advance(IRed, 1);
810 }
811 }
812
emitSharedOrigLValue(CodeGenFunction & CGF,unsigned N)813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815 "Number of generated lvalues must be exactly N.");
816 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818 SharedAddresses.emplace_back(First, Second);
819 if (ClausesData[N].Shared == ClausesData[N].Ref) {
820 OrigAddresses.emplace_back(First, Second);
821 } else {
822 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824 OrigAddresses.emplace_back(First, Second);
825 }
826 }
827
emitAggregateType(CodeGenFunction & CGF,unsigned N)828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829 const auto *PrivateVD =
830 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831 QualType PrivateType = PrivateVD->getType();
832 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833 if (!PrivateType->isVariablyModifiedType()) {
834 Sizes.emplace_back(
835 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836 nullptr);
837 return;
838 }
839 llvm::Value *Size;
840 llvm::Value *SizeInChars;
841 auto *ElemType =
842 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843 ->getElementType();
844 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845 if (AsArraySection) {
846 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847 OrigAddresses[N].first.getPointer(CGF));
848 Size = CGF.Builder.CreateNUWAdd(
849 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851 } else {
852 SizeInChars =
853 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855 }
856 Sizes.emplace_back(SizeInChars, Size);
857 CodeGenFunction::OpaqueValueMapping OpaqueMap(
858 CGF,
859 cast<OpaqueValueExpr>(
860 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861 RValue::get(Size));
862 CGF.EmitVariablyModifiedType(PrivateType);
863 }
864
emitAggregateType(CodeGenFunction & CGF,unsigned N,llvm::Value * Size)865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866 llvm::Value *Size) {
867 const auto *PrivateVD =
868 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869 QualType PrivateType = PrivateVD->getType();
870 if (!PrivateType->isVariablyModifiedType()) {
871 assert(!Size && !Sizes[N].second &&
872 "Size should be nullptr for non-variably modified reduction "
873 "items.");
874 return;
875 }
876 CodeGenFunction::OpaqueValueMapping OpaqueMap(
877 CGF,
878 cast<OpaqueValueExpr>(
879 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880 RValue::get(Size));
881 CGF.EmitVariablyModifiedType(PrivateType);
882 }
883
emitInitialization(CodeGenFunction & CGF,unsigned N,Address PrivateAddr,LValue SharedLVal,llvm::function_ref<bool (CodeGenFunction &)> DefaultInit)884 void ReductionCodeGen::emitInitialization(
885 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887 assert(SharedAddresses.size() > N && "No variable was generated");
888 const auto *PrivateVD =
889 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890 const OMPDeclareReductionDecl *DRD =
891 getReductionInit(ClausesData[N].ReductionOp);
892 QualType PrivateType = PrivateVD->getType();
893 PrivateAddr = CGF.Builder.CreateElementBitCast(
894 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895 QualType SharedType = SharedAddresses[N].first.getType();
896 SharedLVal = CGF.MakeAddrLValue(
897 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898 CGF.ConvertTypeForMem(SharedType)),
899 SharedType, SharedAddresses[N].first.getBaseInfo(),
900 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902 if (DRD && DRD->getInitializer())
903 (void)DefaultInit(CGF);
904 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906 (void)DefaultInit(CGF);
907 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908 PrivateAddr, SharedLVal.getAddress(CGF),
909 SharedLVal.getType());
910 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913 PrivateVD->getType().getQualifiers(),
914 /*IsInitializer=*/false);
915 }
916 }
917
needCleanups(unsigned N)918 bool ReductionCodeGen::needCleanups(unsigned N) {
919 const auto *PrivateVD =
920 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921 QualType PrivateType = PrivateVD->getType();
922 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923 return DTorKind != QualType::DK_none;
924 }
925
emitCleanups(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927 Address PrivateAddr) {
928 const auto *PrivateVD =
929 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930 QualType PrivateType = PrivateVD->getType();
931 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932 if (needCleanups(N)) {
933 PrivateAddr = CGF.Builder.CreateElementBitCast(
934 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936 }
937 }
938
loadToBegin(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,LValue BaseLV)939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940 LValue BaseLV) {
941 BaseTy = BaseTy.getNonReferenceType();
942 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946 } else {
947 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949 }
950 BaseTy = BaseTy->getPointeeType();
951 }
952 return CGF.MakeAddrLValue(
953 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954 CGF.ConvertTypeForMem(ElTy)),
955 BaseLV.getType(), BaseLV.getBaseInfo(),
956 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958
castToBase(CodeGenFunction & CGF,QualType BaseTy,QualType ElTy,llvm::Type * BaseLVType,CharUnits BaseLVAlignment,llvm::Value * Addr)959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961 llvm::Value *Addr) {
962 Address Tmp = Address::invalid();
963 Address TopTmp = Address::invalid();
964 Address MostTopTmp = Address::invalid();
965 BaseTy = BaseTy.getNonReferenceType();
966 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968 Tmp = CGF.CreateMemTemp(BaseTy);
969 if (TopTmp.isValid())
970 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971 else
972 MostTopTmp = Tmp;
973 TopTmp = Tmp;
974 BaseTy = BaseTy->getPointeeType();
975 }
976 llvm::Type *Ty = BaseLVType;
977 if (Tmp.isValid())
978 Ty = Tmp.getElementType();
979 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980 if (Tmp.isValid()) {
981 CGF.Builder.CreateStore(Addr, Tmp);
982 return MostTopTmp;
983 }
984 return Address(Addr, BaseLVAlignment);
985 }
986
getBaseDecl(const Expr * Ref,const DeclRefExpr * & DE)987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988 const VarDecl *OrigVD = nullptr;
989 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992 Base = TempOASE->getBase()->IgnoreParenImpCasts();
993 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994 Base = TempASE->getBase()->IgnoreParenImpCasts();
995 DE = cast<DeclRefExpr>(Base);
996 OrigVD = cast<VarDecl>(DE->getDecl());
997 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000 Base = TempASE->getBase()->IgnoreParenImpCasts();
1001 DE = cast<DeclRefExpr>(Base);
1002 OrigVD = cast<VarDecl>(DE->getDecl());
1003 }
1004 return OrigVD;
1005 }
1006
adjustPrivateAddress(CodeGenFunction & CGF,unsigned N,Address PrivateAddr)1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008 Address PrivateAddr) {
1009 const DeclRefExpr *DE;
1010 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011 BaseDecls.emplace_back(OrigVD);
1012 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013 LValue BaseLValue =
1014 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015 OriginalBaseLValue);
1016 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018 BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019 llvm::Value *PrivatePointer =
1020 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021 PrivateAddr.getPointer(), SharedAddr.getType());
1022 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024 return castToBase(CGF, OrigVD->getType(),
1025 SharedAddresses[N].first.getType(),
1026 OriginalBaseLValue.getAddress(CGF).getType(),
1027 OriginalBaseLValue.getAlignment(), Ptr);
1028 }
1029 BaseDecls.emplace_back(
1030 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031 return PrivateAddr;
1032 }
1033
usesReductionInitializer(unsigned N) const1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035 const OMPDeclareReductionDecl *DRD =
1036 getReductionInit(ClausesData[N].ReductionOp);
1037 return DRD && DRD->getInitializer();
1038 }
1039
getThreadIDVariableLValue(CodeGenFunction & CGF)1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041 return CGF.EmitLoadOfPointerLValue(
1042 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043 getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045
EmitBody(CodeGenFunction & CGF,const Stmt * S)1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047 if (!CGF.HaveInsertPoint())
1048 return;
1049 // 1.2.2 OpenMP Language Terminology
1050 // Structured block - An executable statement with a single entry at the
1051 // top and a single exit at the bottom.
1052 // The point of exit cannot be a branch out of the structured block.
1053 // longjmp() and throw() must not violate the entry/exit criteria.
1054 CGF.EHStack.pushTerminate();
1055 if (S)
1056 CGF.incrementProfileCounter(S);
1057 CodeGen(CGF);
1058 CGF.EHStack.popTerminate();
1059 }
1060
getThreadIDVariableLValue(CodeGenFunction & CGF)1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062 CodeGenFunction &CGF) {
1063 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064 getThreadIDVariable()->getType(),
1065 AlignmentSource::Decl);
1066 }
1067
addFieldToRecordDecl(ASTContext & C,DeclContext * DC,QualType FieldTy)1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069 QualType FieldTy) {
1070 auto *Field = FieldDecl::Create(
1071 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074 Field->setAccess(AS_public);
1075 DC->addDecl(Field);
1076 return Field;
1077 }
1078
CGOpenMPRuntime(CodeGenModule & CGM,StringRef FirstSeparator,StringRef Separator)1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080 StringRef Separator)
1081 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084
1085 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086 OMPBuilder.initialize();
1087 loadOffloadInfoMetadata();
1088 }
1089
clear()1090 void CGOpenMPRuntime::clear() {
1091 InternalVars.clear();
1092 // Clean non-target variable declarations possibly used only in debug info.
1093 for (const auto &Data : EmittedNonTargetVariables) {
1094 if (!Data.getValue().pointsToAliveValue())
1095 continue;
1096 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097 if (!GV)
1098 continue;
1099 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100 continue;
1101 GV->eraseFromParent();
1102 }
1103 }
1104
getName(ArrayRef<StringRef> Parts) const1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106 SmallString<128> Buffer;
1107 llvm::raw_svector_ostream OS(Buffer);
1108 StringRef Sep = FirstSeparator;
1109 for (StringRef Part : Parts) {
1110 OS << Sep << Part;
1111 Sep = Separator;
1112 }
1113 return std::string(OS.str());
1114 }
1115
1116 static llvm::Function *
emitCombinerOrInitializer(CodeGenModule & CGM,QualType Ty,const Expr * CombinerInitializer,const VarDecl * In,const VarDecl * Out,bool IsCombiner)1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118 const Expr *CombinerInitializer, const VarDecl *In,
1119 const VarDecl *Out, bool IsCombiner) {
1120 // void .omp_combiner.(Ty *in, Ty *out);
1121 ASTContext &C = CGM.getContext();
1122 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123 FunctionArgList Args;
1124 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128 Args.push_back(&OmpOutParm);
1129 Args.push_back(&OmpInParm);
1130 const CGFunctionInfo &FnInfo =
1131 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133 std::string Name = CGM.getOpenMPRuntime().getName(
1134 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136 Name, &CGM.getModule());
1137 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138 if (CGM.getLangOpts().Optimize) {
1139 Fn->removeFnAttr(llvm::Attribute::NoInline);
1140 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142 }
1143 CodeGenFunction CGF(CGM);
1144 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147 Out->getLocation());
1148 CodeGenFunction::OMPPrivateScope Scope(CGF);
1149 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152 .getAddress(CGF);
1153 });
1154 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157 .getAddress(CGF);
1158 });
1159 (void)Scope.Privatize();
1160 if (!IsCombiner && Out->hasInit() &&
1161 !CGF.isTrivialInitializer(Out->getInit())) {
1162 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163 Out->getType().getQualifiers(),
1164 /*IsInitializer=*/true);
1165 }
1166 if (CombinerInitializer)
1167 CGF.EmitIgnoredExpr(CombinerInitializer);
1168 Scope.ForceCleanup();
1169 CGF.FinishFunction();
1170 return Fn;
1171 }
1172
emitUserDefinedReduction(CodeGenFunction * CGF,const OMPDeclareReductionDecl * D)1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175 if (UDRMap.count(D) > 0)
1176 return;
1177 llvm::Function *Combiner = emitCombinerOrInitializer(
1178 CGM, D->getType(), D->getCombiner(),
1179 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181 /*IsCombiner=*/true);
1182 llvm::Function *Initializer = nullptr;
1183 if (const Expr *Init = D->getInitializer()) {
1184 Initializer = emitCombinerOrInitializer(
1185 CGM, D->getType(),
1186 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187 : nullptr,
1188 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190 /*IsCombiner=*/false);
1191 }
1192 UDRMap.try_emplace(D, Combiner, Initializer);
1193 if (CGF) {
1194 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195 Decls.second.push_back(D);
1196 }
1197 }
1198
1199 std::pair<llvm::Function *, llvm::Function *>
getUserDefinedReduction(const OMPDeclareReductionDecl * D)1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201 auto I = UDRMap.find(D);
1202 if (I != UDRMap.end())
1203 return I->second;
1204 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205 return UDRMap.lookup(D);
1206 }
1207
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
PushAndPopStackRAII__anona0766a090811::PushAndPopStackRAII1212 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213 bool HasCancel, llvm::omp::Directive Kind)
1214 : OMPBuilder(OMPBuilder) {
1215 if (!OMPBuilder)
1216 return;
1217
1218 // The following callback is the crucial part of clangs cleanup process.
1219 //
1220 // NOTE:
1221 // Once the OpenMPIRBuilder is used to create parallel regions (and
1222 // similar), the cancellation destination (Dest below) is determined via
1223 // IP. That means if we have variables to finalize we split the block at IP,
1224 // use the new block (=BB) as destination to build a JumpDest (via
1225 // getJumpDestInCurrentScope(BB)) which then is fed to
1226 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227 // to push & pop an FinalizationInfo object.
1228 // The FiniCB will still be needed but at the point where the
1229 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231 assert(IP.getBlock()->end() == IP.getPoint() &&
1232 "Clang CG should cause non-terminated block!");
1233 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234 CGF.Builder.restoreIP(IP);
1235 CodeGenFunction::JumpDest Dest =
1236 CGF.getOMPCancelDestination(OMPD_parallel);
1237 CGF.EmitBranchThroughCleanup(Dest);
1238 };
1239
1240 // TODO: Remove this once we emit parallel regions through the
1241 // OpenMPIRBuilder as it can do this setup internally.
1242 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243 OMPBuilder->pushFinalizationCB(std::move(FI));
1244 }
~PushAndPopStackRAII__anona0766a090811::PushAndPopStackRAII1245 ~PushAndPopStackRAII() {
1246 if (OMPBuilder)
1247 OMPBuilder->popFinalizationCB();
1248 }
1249 llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252
emitParallelOrTeamsOutlinedFunction(CodeGenModule & CGM,const OMPExecutableDirective & D,const CapturedStmt * CS,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const StringRef OutlinedHelperName,const RegionCodeGenTy & CodeGen)1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257 assert(ThreadIDVar->getType()->isPointerType() &&
1258 "thread id variable must be of type kmp_int32 *");
1259 CodeGenFunction CGF(CGM, true);
1260 bool HasCancel = false;
1261 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262 HasCancel = OPD->hasCancel();
1263 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264 HasCancel = OPD->hasCancel();
1265 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266 HasCancel = OPSD->hasCancel();
1267 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268 HasCancel = OPFD->hasCancel();
1269 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270 HasCancel = OPFD->hasCancel();
1271 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272 HasCancel = OPFD->hasCancel();
1273 else if (const auto *OPFD =
1274 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275 HasCancel = OPFD->hasCancel();
1276 else if (const auto *OPFD =
1277 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278 HasCancel = OPFD->hasCancel();
1279
1280 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281 // parallel region to make cancellation barriers work properly.
1282 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285 HasCancel, OutlinedHelperName);
1286 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294 return emitParallelOrTeamsOutlinedFunction(
1295 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302 return emitParallelOrTeamsOutlinedFunction(
1303 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310 bool Tied, unsigned &NumberOfParts) {
1311 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312 PrePostActionTy &) {
1313 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315 llvm::Value *TaskArgs[] = {
1316 UpLoc, ThreadID,
1317 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318 TaskTVar->getType()->castAs<PointerType>())
1319 .getPointer(CGF)};
1320 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321 CGM.getModule(), OMPRTL___kmpc_omp_task),
1322 TaskArgs);
1323 };
1324 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325 UntiedCodeGen);
1326 CodeGen.setAction(Action);
1327 assert(!ThreadIDVar->getType()->isPointerType() &&
1328 "thread id variable must be of type kmp_int32 for tasks");
1329 const OpenMPDirectiveKind Region =
1330 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331 : OMPD_task;
1332 const CapturedStmt *CS = D.getCapturedStmt(Region);
1333 bool HasCancel = false;
1334 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335 HasCancel = TD->hasCancel();
1336 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337 HasCancel = TD->hasCancel();
1338 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339 HasCancel = TD->hasCancel();
1340 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341 HasCancel = TD->hasCancel();
1342
1343 CodeGenFunction CGF(CGM, true);
1344 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345 InnermostKind, HasCancel, Action);
1346 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348 if (!Tied)
1349 NumberOfParts = Action.getNumberOfParts();
1350 return Res;
1351 }
1352
buildStructValue(ConstantStructBuilder & Fields,CodeGenModule & CGM,const RecordDecl * RD,const CGRecordLayout & RL,ArrayRef<llvm::Constant * > Data)1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354 const RecordDecl *RD, const CGRecordLayout &RL,
1355 ArrayRef<llvm::Constant *> Data) {
1356 llvm::StructType *StructTy = RL.getLLVMType();
1357 unsigned PrevIdx = 0;
1358 ConstantInitBuilder CIBuilder(CGM);
1359 auto DI = Data.begin();
1360 for (const FieldDecl *FD : RD->fields()) {
1361 unsigned Idx = RL.getLLVMFieldNo(FD);
1362 // Fill the alignment.
1363 for (unsigned I = PrevIdx; I < Idx; ++I)
1364 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365 PrevIdx = Idx + 1;
1366 Fields.add(*DI);
1367 ++DI;
1368 }
1369 }
1370
1371 template <class... As>
1372 static llvm::GlobalVariable *
createGlobalStruct(CodeGenModule & CGM,QualType Ty,bool IsConstant,ArrayRef<llvm::Constant * > Data,const Twine & Name,As &&...Args)1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375 As &&... Args) {
1376 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378 ConstantInitBuilder CIBuilder(CGM);
1379 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380 buildStructValue(Fields, CGM, RD, RL, Data);
1381 return Fields.finishAndCreateGlobal(
1382 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383 std::forward<As>(Args)...);
1384 }
1385
1386 template <typename T>
1387 static void
createConstantGlobalStructAndAddToParent(CodeGenModule & CGM,QualType Ty,ArrayRef<llvm::Constant * > Data,T & Parent)1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389 ArrayRef<llvm::Constant *> Data,
1390 T &Parent) {
1391 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394 buildStructValue(Fields, CGM, RD, RL, Data);
1395 Fields.finishAndAddTo(Parent);
1396 }
1397
setLocThreadIdInsertPt(CodeGenFunction & CGF,bool AtCurrentPoint)1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399 bool AtCurrentPoint) {
1400 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402
1403 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404 if (AtCurrentPoint) {
1405 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407 } else {
1408 Elem.second.ServiceInsertPt =
1409 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411 }
1412 }
1413
clearLocThreadIdInsertPt(CodeGenFunction & CGF)1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416 if (Elem.second.ServiceInsertPt) {
1417 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418 Elem.second.ServiceInsertPt = nullptr;
1419 Ptr->eraseFromParent();
1420 }
1421 }
1422
getIdentStringFromSourceLocation(CodeGenFunction & CGF,SourceLocation Loc,SmallString<128> & Buffer)1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424 SourceLocation Loc,
1425 SmallString<128> &Buffer) {
1426 llvm::raw_svector_ostream OS(Buffer);
1427 // Build debug location
1428 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429 OS << ";" << PLoc.getFilename() << ";";
1430 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431 OS << FD->getQualifiedNameAsString();
1432 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433 return OS.str();
1434 }
1435
emitUpdateLocation(CodeGenFunction & CGF,SourceLocation Loc,unsigned Flags)1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437 SourceLocation Loc,
1438 unsigned Flags) {
1439 llvm::Constant *SrcLocStr;
1440 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441 Loc.isInvalid()) {
1442 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443 } else {
1444 std::string FunctionName = "";
1445 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446 FunctionName = FD->getQualifiedNameAsString();
1447 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448 const char *FileName = PLoc.getFilename();
1449 unsigned Line = PLoc.getLine();
1450 unsigned Column = PLoc.getColumn();
1451 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1452 Line, Column);
1453 }
1454 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456 Reserved2Flags);
1457 }
1458
getThreadID(CodeGenFunction & CGF,SourceLocation Loc)1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460 SourceLocation Loc) {
1461 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463 // the clang invariants used below might be broken.
1464 if (CGM.getLangOpts().OpenMPIRBuilder) {
1465 SmallString<128> Buffer;
1466 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468 getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469 return OMPBuilder.getOrCreateThreadID(
1470 OMPBuilder.getOrCreateIdent(SrcLocStr));
1471 }
1472
1473 llvm::Value *ThreadID = nullptr;
1474 // Check whether we've already cached a load of the thread id in this
1475 // function.
1476 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477 if (I != OpenMPLocThreadIDMap.end()) {
1478 ThreadID = I->second.ThreadID;
1479 if (ThreadID != nullptr)
1480 return ThreadID;
1481 }
1482 // If exceptions are enabled, do not use parameter to avoid possible crash.
1483 if (auto *OMPRegionInfo =
1484 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485 if (OMPRegionInfo->getThreadIDVariable()) {
1486 // Check if this an outlined function with thread id passed as argument.
1487 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490 !CGF.getLangOpts().CXXExceptions ||
1491 CGF.Builder.GetInsertBlock() == TopBlock ||
1492 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494 TopBlock ||
1495 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496 CGF.Builder.GetInsertBlock()) {
1497 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498 // If value loaded in entry block, cache it and use it everywhere in
1499 // function.
1500 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502 Elem.second.ThreadID = ThreadID;
1503 }
1504 return ThreadID;
1505 }
1506 }
1507 }
1508
1509 // This is not an outlined function region - need to call __kmpc_int32
1510 // kmpc_global_thread_num(ident_t *loc).
1511 // Generate thread id value and cache this value for use across the
1512 // function.
1513 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514 if (!Elem.second.ServiceInsertPt)
1515 setLocThreadIdInsertPt(CGF);
1516 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518 llvm::CallInst *Call = CGF.Builder.CreateCall(
1519 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520 OMPRTL___kmpc_global_thread_num),
1521 emitUpdateLocation(CGF, Loc));
1522 Call->setCallingConv(CGF.getRuntimeCC());
1523 Elem.second.ThreadID = Call;
1524 return Call;
1525 }
1526
functionFinished(CodeGenFunction & CGF)1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530 clearLocThreadIdInsertPt(CGF);
1531 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532 }
1533 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535 UDRMap.erase(D);
1536 FunctionUDRMap.erase(CGF.CurFn);
1537 }
1538 auto I = FunctionUDMMap.find(CGF.CurFn);
1539 if (I != FunctionUDMMap.end()) {
1540 for(const auto *D : I->second)
1541 UDMMap.erase(D);
1542 FunctionUDMMap.erase(I);
1543 }
1544 LastprivateConditionalToTypes.erase(CGF.CurFn);
1545 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547
getIdentTyPointerTy()1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549 return OMPBuilder.IdentPtr;
1550 }
1551
getKmpc_MicroPointerTy()1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553 if (!Kmpc_MicroTy) {
1554 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558 }
1559 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561
1562 llvm::FunctionCallee
createForStaticInitFunction(unsigned IVSize,bool IVSigned)1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1564 assert((IVSize == 32 || IVSize == 64) &&
1565 "IV size is not compatible with the omp runtime");
1566 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567 : "__kmpc_for_static_init_4u")
1568 : (IVSigned ? "__kmpc_for_static_init_8"
1569 : "__kmpc_for_static_init_8u");
1570 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572 llvm::Type *TypeParams[] = {
1573 getIdentTyPointerTy(), // loc
1574 CGM.Int32Ty, // tid
1575 CGM.Int32Ty, // schedtype
1576 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577 PtrTy, // p_lower
1578 PtrTy, // p_upper
1579 PtrTy, // p_stride
1580 ITy, // incr
1581 ITy // chunk
1582 };
1583 auto *FnTy =
1584 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585 return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587
1588 llvm::FunctionCallee
createDispatchInitFunction(unsigned IVSize,bool IVSigned)1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590 assert((IVSize == 32 || IVSize == 64) &&
1591 "IV size is not compatible with the omp runtime");
1592 StringRef Name =
1593 IVSize == 32
1594 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598 CGM.Int32Ty, // tid
1599 CGM.Int32Ty, // schedtype
1600 ITy, // lower
1601 ITy, // upper
1602 ITy, // stride
1603 ITy // chunk
1604 };
1605 auto *FnTy =
1606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607 return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609
1610 llvm::FunctionCallee
createDispatchFiniFunction(unsigned IVSize,bool IVSigned)1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612 assert((IVSize == 32 || IVSize == 64) &&
1613 "IV size is not compatible with the omp runtime");
1614 StringRef Name =
1615 IVSize == 32
1616 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618 llvm::Type *TypeParams[] = {
1619 getIdentTyPointerTy(), // loc
1620 CGM.Int32Ty, // tid
1621 };
1622 auto *FnTy =
1623 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624 return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626
1627 llvm::FunctionCallee
createDispatchNextFunction(unsigned IVSize,bool IVSigned)1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629 assert((IVSize == 32 || IVSize == 64) &&
1630 "IV size is not compatible with the omp runtime");
1631 StringRef Name =
1632 IVSize == 32
1633 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637 llvm::Type *TypeParams[] = {
1638 getIdentTyPointerTy(), // loc
1639 CGM.Int32Ty, // tid
1640 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641 PtrTy, // p_lower
1642 PtrTy, // p_upper
1643 PtrTy // p_stride
1644 };
1645 auto *FnTy =
1646 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647 return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
getTargetEntryUniqueInfo(ASTContext & C,SourceLocation Loc,unsigned & DeviceID,unsigned & FileID,unsigned & LineNum)1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654 unsigned &DeviceID, unsigned &FileID,
1655 unsigned &LineNum) {
1656 SourceManager &SM = C.getSourceManager();
1657
1658 // The loc should be always valid and have a file ID (the user cannot use
1659 // #pragma directives in macros)
1660
1661 assert(Loc.isValid() && "Source location is expected to be always valid.");
1662
1663 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665
1666 llvm::sys::fs::UniqueID ID;
1667 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672 << PLoc.getFilename() << EC.message();
1673 }
1674
1675 DeviceID = ID.getDevice();
1676 FileID = ID.getFile();
1677 LineNum = PLoc.getLine();
1678 }
1679
getAddrOfDeclareTargetVar(const VarDecl * VD)1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681 if (CGM.getLangOpts().OpenMPSimd)
1682 return Address::invalid();
1683 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687 HasRequiresUnifiedSharedMemory))) {
1688 SmallString<64> PtrName;
1689 {
1690 llvm::raw_svector_ostream OS(PtrName);
1691 OS << CGM.getMangledName(GlobalDecl(VD));
1692 if (!VD->isExternallyVisible()) {
1693 unsigned DeviceID, FileID, Line;
1694 getTargetEntryUniqueInfo(CGM.getContext(),
1695 VD->getCanonicalDecl()->getBeginLoc(),
1696 DeviceID, FileID, Line);
1697 OS << llvm::format("_%x", FileID);
1698 }
1699 OS << "_decl_tgt_ref_ptr";
1700 }
1701 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702 if (!Ptr) {
1703 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705 PtrName);
1706
1707 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709
1710 if (!CGM.getLangOpts().OpenMPIsDevice)
1711 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713 }
1714 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1715 }
1716 return Address::invalid();
1717 }
1718
1719 llvm::Constant *
getOrCreateThreadPrivateCache(const VarDecl * VD)1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722 !CGM.getContext().getTargetInfo().isTLSSupported());
1723 // Lookup the entry, lazily creating it if necessary.
1724 std::string Suffix = getName({"cache", ""});
1725 return getOrCreateInternalVariable(
1726 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730 const VarDecl *VD,
1731 Address VDAddr,
1732 SourceLocation Loc) {
1733 if (CGM.getLangOpts().OpenMPUseTLS &&
1734 CGM.getContext().getTargetInfo().isTLSSupported())
1735 return VDAddr;
1736
1737 llvm::Type *VarTy = VDAddr.getElementType();
1738 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1739 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1740 CGM.Int8PtrTy),
1741 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742 getOrCreateThreadPrivateCache(VD)};
1743 return Address(CGF.EmitRuntimeCall(
1744 OMPBuilder.getOrCreateRuntimeFunction(
1745 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1746 Args),
1747 VDAddr.getAlignment());
1748 }
1749
emitThreadPrivateVarInit(CodeGenFunction & CGF,Address VDAddr,llvm::Value * Ctor,llvm::Value * CopyCtor,llvm::Value * Dtor,SourceLocation Loc)1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754 // library.
1755 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1756 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1757 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1758 OMPLoc);
1759 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1760 // to register constructor/destructor for variable.
1761 llvm::Value *Args[] = {
1762 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1763 Ctor, CopyCtor, Dtor};
1764 CGF.EmitRuntimeCall(
1765 OMPBuilder.getOrCreateRuntimeFunction(
1766 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1767 Args);
1768 }
1769
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772 bool PerformInit, CodeGenFunction *CGF) {
1773 if (CGM.getLangOpts().OpenMPUseTLS &&
1774 CGM.getContext().getTargetInfo().isTLSSupported())
1775 return nullptr;
1776
1777 VD = VD->getDefinition(CGM.getContext());
1778 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1779 QualType ASTTy = VD->getType();
1780
1781 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1782 const Expr *Init = VD->getAnyInitializer();
1783 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1784 // Generate function that re-emits the declaration's initializer into the
1785 // threadprivate copy of the variable VD
1786 CodeGenFunction CtorCGF(CGM);
1787 FunctionArgList Args;
1788 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1789 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1790 ImplicitParamDecl::Other);
1791 Args.push_back(&Dst);
1792
1793 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794 CGM.getContext().VoidPtrTy, Args);
1795 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1796 std::string Name = getName({"__kmpc_global_ctor_", ""});
1797 llvm::Function *Fn =
1798 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1799 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1800 Args, Loc, Loc);
1801 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1802 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1803 CGM.getContext().VoidPtrTy, Dst.getLocation());
1804 Address Arg = Address(ArgVal, VDAddr.getAlignment());
1805 Arg = CtorCGF.Builder.CreateElementBitCast(
1806 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1807 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1808 /*IsInitializer=*/true);
1809 ArgVal = CtorCGF.EmitLoadOfScalar(
1810 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1811 CGM.getContext().VoidPtrTy, Dst.getLocation());
1812 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1813 CtorCGF.FinishFunction();
1814 Ctor = Fn;
1815 }
1816 if (VD->getType().isDestructedType() != QualType::DK_none) {
1817 // Generate function that emits destructor call for the threadprivate copy
1818 // of the variable VD
1819 CodeGenFunction DtorCGF(CGM);
1820 FunctionArgList Args;
1821 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1822 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1823 ImplicitParamDecl::Other);
1824 Args.push_back(&Dst);
1825
1826 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827 CGM.getContext().VoidTy, Args);
1828 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1829 std::string Name = getName({"__kmpc_global_dtor_", ""});
1830 llvm::Function *Fn =
1831 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1832 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1833 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1834 Loc, Loc);
1835 // Create a scope with an artificial location for the body of this function.
1836 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1837 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1838 DtorCGF.GetAddrOfLocalVar(&Dst),
1839 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1840 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1841 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1842 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1843 DtorCGF.FinishFunction();
1844 Dtor = Fn;
1845 }
1846 // Do not emit init function if it is not required.
1847 if (!Ctor && !Dtor)
1848 return nullptr;
1849
1850 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1851 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852 /*isVarArg=*/false)
1853 ->getPointerTo();
1854 // Copying constructor for the threadprivate variable.
1855 // Must be NULL - reserved by runtime, but currently it requires that this
1856 // parameter is always NULL. Otherwise it fires assertion.
1857 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1858 if (Ctor == nullptr) {
1859 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1860 /*isVarArg=*/false)
1861 ->getPointerTo();
1862 Ctor = llvm::Constant::getNullValue(CtorTy);
1863 }
1864 if (Dtor == nullptr) {
1865 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1866 /*isVarArg=*/false)
1867 ->getPointerTo();
1868 Dtor = llvm::Constant::getNullValue(DtorTy);
1869 }
1870 if (!CGF) {
1871 auto *InitFunctionTy =
1872 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1873 std::string Name = getName({"__omp_threadprivate_init_", ""});
1874 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1875 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1876 CodeGenFunction InitCGF(CGM);
1877 FunctionArgList ArgList;
1878 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1879 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1880 Loc, Loc);
1881 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882 InitCGF.FinishFunction();
1883 return InitFunction;
1884 }
1885 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886 }
1887 return nullptr;
1888 }
1889
emitDeclareTargetVarDefinition(const VarDecl * VD,llvm::GlobalVariable * Addr,bool PerformInit)1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1891 llvm::GlobalVariable *Addr,
1892 bool PerformInit) {
1893 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1894 !CGM.getLangOpts().OpenMPIsDevice)
1895 return false;
1896 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1897 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1898 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1899 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1900 HasRequiresUnifiedSharedMemory))
1901 return CGM.getLangOpts().OpenMPIsDevice;
1902 VD = VD->getDefinition(CGM.getContext());
1903 assert(VD && "Unknown VarDecl");
1904
1905 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1906 return CGM.getLangOpts().OpenMPIsDevice;
1907
1908 QualType ASTTy = VD->getType();
1909 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1910
1911 // Produce the unique prefix to identify the new target regions. We use
1912 // the source location of the variable declaration which we know to not
1913 // conflict with any target region.
1914 unsigned DeviceID;
1915 unsigned FileID;
1916 unsigned Line;
1917 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1918 SmallString<128> Buffer, Out;
1919 {
1920 llvm::raw_svector_ostream OS(Buffer);
1921 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1922 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1923 }
1924
1925 const Expr *Init = VD->getAnyInitializer();
1926 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1927 llvm::Constant *Ctor;
1928 llvm::Constant *ID;
1929 if (CGM.getLangOpts().OpenMPIsDevice) {
1930 // Generate function that re-emits the declaration's initializer into
1931 // the threadprivate copy of the variable VD
1932 CodeGenFunction CtorCGF(CGM);
1933
1934 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1935 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1936 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1937 FTy, Twine(Buffer, "_ctor"), FI, Loc);
1938 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1939 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1940 FunctionArgList(), Loc, Loc);
1941 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1942 CtorCGF.EmitAnyExprToMem(Init,
1943 Address(Addr, CGM.getContext().getDeclAlign(VD)),
1944 Init->getType().getQualifiers(),
1945 /*IsInitializer=*/true);
1946 CtorCGF.FinishFunction();
1947 Ctor = Fn;
1948 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1949 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1950 } else {
1951 Ctor = new llvm::GlobalVariable(
1952 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953 llvm::GlobalValue::PrivateLinkage,
1954 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955 ID = Ctor;
1956 }
1957
1958 // Register the information for the entry associated with the constructor.
1959 Out.clear();
1960 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963 }
1964 if (VD->getType().isDestructedType() != QualType::DK_none) {
1965 llvm::Constant *Dtor;
1966 llvm::Constant *ID;
1967 if (CGM.getLangOpts().OpenMPIsDevice) {
1968 // Generate function that emits destructor call for the threadprivate
1969 // copy of the variable VD
1970 CodeGenFunction DtorCGF(CGM);
1971
1972 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975 FTy, Twine(Buffer, "_dtor"), FI, Loc);
1976 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1977 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1978 FunctionArgList(), Loc, Loc);
1979 // Create a scope with an artificial location for the body of this
1980 // function.
1981 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1982 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1983 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1984 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1985 DtorCGF.FinishFunction();
1986 Dtor = Fn;
1987 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1988 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1989 } else {
1990 Dtor = new llvm::GlobalVariable(
1991 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1992 llvm::GlobalValue::PrivateLinkage,
1993 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1994 ID = Dtor;
1995 }
1996 // Register the information for the entry associated with the destructor.
1997 Out.clear();
1998 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1999 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2000 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2001 }
2002 return CGM.getLangOpts().OpenMPIsDevice;
2003 }
2004
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2006 QualType VarType,
2007 StringRef Name) {
2008 std::string Suffix = getName({"artificial", ""});
2009 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2010 llvm::Value *GAddr =
2011 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2012 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2013 CGM.getTarget().isTLSSupported()) {
2014 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2015 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2016 }
2017 std::string CacheSuffix = getName({"cache", ""});
2018 llvm::Value *Args[] = {
2019 emitUpdateLocation(CGF, SourceLocation()),
2020 getThreadID(CGF, SourceLocation()),
2021 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2022 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2023 /*isSigned=*/false),
2024 getOrCreateInternalVariable(
2025 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2026 return Address(
2027 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2028 CGF.EmitRuntimeCall(
2029 OMPBuilder.getOrCreateRuntimeFunction(
2030 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2031 Args),
2032 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2033 CGM.getContext().getTypeAlignInChars(VarType));
2034 }
2035
emitIfClause(CodeGenFunction & CGF,const Expr * Cond,const RegionCodeGenTy & ThenGen,const RegionCodeGenTy & ElseGen)2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2037 const RegionCodeGenTy &ThenGen,
2038 const RegionCodeGenTy &ElseGen) {
2039 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2040
2041 // If the condition constant folds and can be elided, try to avoid emitting
2042 // the condition and the dead arm of the if/else.
2043 bool CondConstant;
2044 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2045 if (CondConstant)
2046 ThenGen(CGF);
2047 else
2048 ElseGen(CGF);
2049 return;
2050 }
2051
2052 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2053 // emit the conditional branch.
2054 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2055 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2056 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2057 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2058
2059 // Emit the 'then' code.
2060 CGF.EmitBlock(ThenBlock);
2061 ThenGen(CGF);
2062 CGF.EmitBranch(ContBlock);
2063 // Emit the 'else' code if present.
2064 // There is no need to emit line number for unconditional branch.
2065 (void)ApplyDebugLocation::CreateEmpty(CGF);
2066 CGF.EmitBlock(ElseBlock);
2067 ElseGen(CGF);
2068 // There is no need to emit line number for unconditional branch.
2069 (void)ApplyDebugLocation::CreateEmpty(CGF);
2070 CGF.EmitBranch(ContBlock);
2071 // Emit the continuation block for code after the if.
2072 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2073 }
2074
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2076 llvm::Function *OutlinedFn,
2077 ArrayRef<llvm::Value *> CapturedVars,
2078 const Expr *IfCond) {
2079 if (!CGF.HaveInsertPoint())
2080 return;
2081 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2082 auto &M = CGM.getModule();
2083 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2084 this](CodeGenFunction &CGF, PrePostActionTy &) {
2085 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2086 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2087 llvm::Value *Args[] = {
2088 RTLoc,
2089 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2090 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2091 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2092 RealArgs.append(std::begin(Args), std::end(Args));
2093 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2094
2095 llvm::FunctionCallee RTLFn =
2096 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2097 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2098 };
2099 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2100 this](CodeGenFunction &CGF, PrePostActionTy &) {
2101 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2102 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2103 // Build calls:
2104 // __kmpc_serialized_parallel(&Loc, GTid);
2105 llvm::Value *Args[] = {RTLoc, ThreadID};
2106 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2107 M, OMPRTL___kmpc_serialized_parallel),
2108 Args);
2109
2110 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2111 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2112 Address ZeroAddrBound =
2113 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2114 /*Name=*/".bound.zero.addr");
2115 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2116 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2117 // ThreadId for serialized parallels is 0.
2118 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2119 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2120 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2121
2122 // Ensure we do not inline the function. This is trivially true for the ones
2123 // passed to __kmpc_fork_call but the ones called in serialized regions
2124 // could be inlined. This is not a perfect but it is closer to the invariant
2125 // we want, namely, every data environment starts with a new function.
2126 // TODO: We should pass the if condition to the runtime function and do the
2127 // handling there. Much cleaner code.
2128 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2129 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2130 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2131
2132 // __kmpc_end_serialized_parallel(&Loc, GTid);
2133 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2134 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2135 M, OMPRTL___kmpc_end_serialized_parallel),
2136 EndArgs);
2137 };
2138 if (IfCond) {
2139 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2140 } else {
2141 RegionCodeGenTy ThenRCG(ThenGen);
2142 ThenRCG(CGF);
2143 }
2144 }
2145
2146 // If we're inside an (outlined) parallel region, use the region info's
2147 // thread-ID variable (it is passed in a first argument of the outlined function
2148 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2149 // regular serial code region, get thread ID by calling kmp_int32
2150 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2151 // return the address of that temp.
emitThreadIDAddress(CodeGenFunction & CGF,SourceLocation Loc)2152 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2153 SourceLocation Loc) {
2154 if (auto *OMPRegionInfo =
2155 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2156 if (OMPRegionInfo->getThreadIDVariable())
2157 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2158
2159 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2160 QualType Int32Ty =
2161 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2162 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2163 CGF.EmitStoreOfScalar(ThreadID,
2164 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2165
2166 return ThreadIDTemp;
2167 }
2168
getOrCreateInternalVariable(llvm::Type * Ty,const llvm::Twine & Name,unsigned AddressSpace)2169 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2170 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2171 SmallString<256> Buffer;
2172 llvm::raw_svector_ostream Out(Buffer);
2173 Out << Name;
2174 StringRef RuntimeName = Out.str();
2175 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2176 if (Elem.second) {
2177 assert(Elem.second->getType()->getPointerElementType() == Ty &&
2178 "OMP internal variable has different type than requested");
2179 return &*Elem.second;
2180 }
2181
2182 return Elem.second = new llvm::GlobalVariable(
2183 CGM.getModule(), Ty, /*IsConstant*/ false,
2184 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2185 Elem.first(), /*InsertBefore=*/nullptr,
2186 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2187 }
2188
getCriticalRegionLock(StringRef CriticalName)2189 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2190 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2191 std::string Name = getName({Prefix, "var"});
2192 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2193 }
2194
2195 namespace {
2196 /// Common pre(post)-action for different OpenMP constructs.
2197 class CommonActionTy final : public PrePostActionTy {
2198 llvm::FunctionCallee EnterCallee;
2199 ArrayRef<llvm::Value *> EnterArgs;
2200 llvm::FunctionCallee ExitCallee;
2201 ArrayRef<llvm::Value *> ExitArgs;
2202 bool Conditional;
2203 llvm::BasicBlock *ContBlock = nullptr;
2204
2205 public:
CommonActionTy(llvm::FunctionCallee EnterCallee,ArrayRef<llvm::Value * > EnterArgs,llvm::FunctionCallee ExitCallee,ArrayRef<llvm::Value * > ExitArgs,bool Conditional=false)2206 CommonActionTy(llvm::FunctionCallee EnterCallee,
2207 ArrayRef<llvm::Value *> EnterArgs,
2208 llvm::FunctionCallee ExitCallee,
2209 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2210 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2211 ExitArgs(ExitArgs), Conditional(Conditional) {}
Enter(CodeGenFunction & CGF)2212 void Enter(CodeGenFunction &CGF) override {
2213 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2214 if (Conditional) {
2215 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2216 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2217 ContBlock = CGF.createBasicBlock("omp_if.end");
2218 // Generate the branch (If-stmt)
2219 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2220 CGF.EmitBlock(ThenBlock);
2221 }
2222 }
Done(CodeGenFunction & CGF)2223 void Done(CodeGenFunction &CGF) {
2224 // Emit the rest of blocks/branches
2225 CGF.EmitBranch(ContBlock);
2226 CGF.EmitBlock(ContBlock, true);
2227 }
Exit(CodeGenFunction & CGF)2228 void Exit(CodeGenFunction &CGF) override {
2229 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2230 }
2231 };
2232 } // anonymous namespace
2233
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)2234 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2235 StringRef CriticalName,
2236 const RegionCodeGenTy &CriticalOpGen,
2237 SourceLocation Loc, const Expr *Hint) {
2238 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2239 // CriticalOpGen();
2240 // __kmpc_end_critical(ident_t *, gtid, Lock);
2241 // Prepare arguments and build a call to __kmpc_critical
2242 if (!CGF.HaveInsertPoint())
2243 return;
2244 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2245 getCriticalRegionLock(CriticalName)};
2246 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2247 std::end(Args));
2248 if (Hint) {
2249 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2250 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2251 }
2252 CommonActionTy Action(
2253 OMPBuilder.getOrCreateRuntimeFunction(
2254 CGM.getModule(),
2255 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2256 EnterArgs,
2257 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2258 OMPRTL___kmpc_end_critical),
2259 Args);
2260 CriticalOpGen.setAction(Action);
2261 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2262 }
2263
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)2264 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2265 const RegionCodeGenTy &MasterOpGen,
2266 SourceLocation Loc) {
2267 if (!CGF.HaveInsertPoint())
2268 return;
2269 // if(__kmpc_master(ident_t *, gtid)) {
2270 // MasterOpGen();
2271 // __kmpc_end_master(ident_t *, gtid);
2272 // }
2273 // Prepare arguments and build a call to __kmpc_master
2274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276 CGM.getModule(), OMPRTL___kmpc_master),
2277 Args,
2278 OMPBuilder.getOrCreateRuntimeFunction(
2279 CGM.getModule(), OMPRTL___kmpc_end_master),
2280 Args,
2281 /*Conditional=*/true);
2282 MasterOpGen.setAction(Action);
2283 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2284 Action.Done(CGF);
2285 }
2286
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MaskedOpGen,SourceLocation Loc,const Expr * Filter)2287 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2288 const RegionCodeGenTy &MaskedOpGen,
2289 SourceLocation Loc, const Expr *Filter) {
2290 if (!CGF.HaveInsertPoint())
2291 return;
2292 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2293 // MaskedOpGen();
2294 // __kmpc_end_masked(iden_t *, gtid);
2295 // }
2296 // Prepare arguments and build a call to __kmpc_masked
2297 llvm::Value *FilterVal = Filter
2298 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2299 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2300 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2301 FilterVal};
2302 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2303 getThreadID(CGF, Loc)};
2304 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2305 CGM.getModule(), OMPRTL___kmpc_masked),
2306 Args,
2307 OMPBuilder.getOrCreateRuntimeFunction(
2308 CGM.getModule(), OMPRTL___kmpc_end_masked),
2309 ArgsEnd,
2310 /*Conditional=*/true);
2311 MaskedOpGen.setAction(Action);
2312 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2313 Action.Done(CGF);
2314 }
2315
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)2316 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2317 SourceLocation Loc) {
2318 if (!CGF.HaveInsertPoint())
2319 return;
2320 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2321 OMPBuilder.createTaskyield(CGF.Builder);
2322 } else {
2323 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2324 llvm::Value *Args[] = {
2325 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2326 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2327 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2328 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2329 Args);
2330 }
2331
2332 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2333 Region->emitUntiedSwitch(CGF);
2334 }
2335
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)2336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2337 const RegionCodeGenTy &TaskgroupOpGen,
2338 SourceLocation Loc) {
2339 if (!CGF.HaveInsertPoint())
2340 return;
2341 // __kmpc_taskgroup(ident_t *, gtid);
2342 // TaskgroupOpGen();
2343 // __kmpc_end_taskgroup(ident_t *, gtid);
2344 // Prepare arguments and build a call to __kmpc_taskgroup
2345 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2346 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2347 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2348 Args,
2349 OMPBuilder.getOrCreateRuntimeFunction(
2350 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2351 Args);
2352 TaskgroupOpGen.setAction(Action);
2353 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2354 }
2355
2356 /// Given an array of pointers to variables, project the address of a
2357 /// given variable.
emitAddrOfVarFromArray(CodeGenFunction & CGF,Address Array,unsigned Index,const VarDecl * Var)2358 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2359 unsigned Index, const VarDecl *Var) {
2360 // Pull out the pointer to the variable.
2361 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2362 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2363
2364 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2365 Addr = CGF.Builder.CreateElementBitCast(
2366 Addr, CGF.ConvertTypeForMem(Var->getType()));
2367 return Addr;
2368 }
2369
emitCopyprivateCopyFunction(CodeGenModule & CGM,llvm::Type * ArgsType,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps,SourceLocation Loc)2370 static llvm::Value *emitCopyprivateCopyFunction(
2371 CodeGenModule &CGM, llvm::Type *ArgsType,
2372 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2373 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2374 SourceLocation Loc) {
2375 ASTContext &C = CGM.getContext();
2376 // void copy_func(void *LHSArg, void *RHSArg);
2377 FunctionArgList Args;
2378 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2379 ImplicitParamDecl::Other);
2380 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2381 ImplicitParamDecl::Other);
2382 Args.push_back(&LHSArg);
2383 Args.push_back(&RHSArg);
2384 const auto &CGFI =
2385 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2386 std::string Name =
2387 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2388 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2389 llvm::GlobalValue::InternalLinkage, Name,
2390 &CGM.getModule());
2391 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2392 Fn->setDoesNotRecurse();
2393 CodeGenFunction CGF(CGM);
2394 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2395 // Dest = (void*[n])(LHSArg);
2396 // Src = (void*[n])(RHSArg);
2397 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2398 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2399 ArgsType), CGF.getPointerAlign());
2400 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2401 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2402 ArgsType), CGF.getPointerAlign());
2403 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2404 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2405 // ...
2406 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2407 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2408 const auto *DestVar =
2409 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2410 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2411
2412 const auto *SrcVar =
2413 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2414 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2415
2416 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2417 QualType Type = VD->getType();
2418 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2419 }
2420 CGF.FinishFunction();
2421 return Fn;
2422 }
2423
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > DstExprs,ArrayRef<const Expr * > AssignmentOps)2424 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2425 const RegionCodeGenTy &SingleOpGen,
2426 SourceLocation Loc,
2427 ArrayRef<const Expr *> CopyprivateVars,
2428 ArrayRef<const Expr *> SrcExprs,
2429 ArrayRef<const Expr *> DstExprs,
2430 ArrayRef<const Expr *> AssignmentOps) {
2431 if (!CGF.HaveInsertPoint())
2432 return;
2433 assert(CopyprivateVars.size() == SrcExprs.size() &&
2434 CopyprivateVars.size() == DstExprs.size() &&
2435 CopyprivateVars.size() == AssignmentOps.size());
2436 ASTContext &C = CGM.getContext();
2437 // int32 did_it = 0;
2438 // if(__kmpc_single(ident_t *, gtid)) {
2439 // SingleOpGen();
2440 // __kmpc_end_single(ident_t *, gtid);
2441 // did_it = 1;
2442 // }
2443 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2444 // <copy_func>, did_it);
2445
2446 Address DidIt = Address::invalid();
2447 if (!CopyprivateVars.empty()) {
2448 // int32 did_it = 0;
2449 QualType KmpInt32Ty =
2450 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2451 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2452 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2453 }
2454 // Prepare arguments and build a call to __kmpc_single
2455 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2456 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2457 CGM.getModule(), OMPRTL___kmpc_single),
2458 Args,
2459 OMPBuilder.getOrCreateRuntimeFunction(
2460 CGM.getModule(), OMPRTL___kmpc_end_single),
2461 Args,
2462 /*Conditional=*/true);
2463 SingleOpGen.setAction(Action);
2464 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2465 if (DidIt.isValid()) {
2466 // did_it = 1;
2467 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2468 }
2469 Action.Done(CGF);
2470 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2471 // <copy_func>, did_it);
2472 if (DidIt.isValid()) {
2473 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2474 QualType CopyprivateArrayTy = C.getConstantArrayType(
2475 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2476 /*IndexTypeQuals=*/0);
2477 // Create a list of all private variables for copyprivate.
2478 Address CopyprivateList =
2479 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2480 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2481 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2482 CGF.Builder.CreateStore(
2483 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2484 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2485 CGF.VoidPtrTy),
2486 Elem);
2487 }
2488 // Build function that copies private values from single region to all other
2489 // threads in the corresponding parallel region.
2490 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2491 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2492 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2493 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2494 Address CL =
2495 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2496 CGF.VoidPtrTy);
2497 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2498 llvm::Value *Args[] = {
2499 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2500 getThreadID(CGF, Loc), // i32 <gtid>
2501 BufSize, // size_t <buf_size>
2502 CL.getPointer(), // void *<copyprivate list>
2503 CpyFn, // void (*) (void *, void *) <copy_func>
2504 DidItVal // i32 did_it
2505 };
2506 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2507 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2508 Args);
2509 }
2510 }
2511
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)2512 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2513 const RegionCodeGenTy &OrderedOpGen,
2514 SourceLocation Loc, bool IsThreads) {
2515 if (!CGF.HaveInsertPoint())
2516 return;
2517 // __kmpc_ordered(ident_t *, gtid);
2518 // OrderedOpGen();
2519 // __kmpc_end_ordered(ident_t *, gtid);
2520 // Prepare arguments and build a call to __kmpc_ordered
2521 if (IsThreads) {
2522 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2523 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2524 CGM.getModule(), OMPRTL___kmpc_ordered),
2525 Args,
2526 OMPBuilder.getOrCreateRuntimeFunction(
2527 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2528 Args);
2529 OrderedOpGen.setAction(Action);
2530 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2531 return;
2532 }
2533 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2534 }
2535
getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)2536 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2537 unsigned Flags;
2538 if (Kind == OMPD_for)
2539 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2540 else if (Kind == OMPD_sections)
2541 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2542 else if (Kind == OMPD_single)
2543 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2544 else if (Kind == OMPD_barrier)
2545 Flags = OMP_IDENT_BARRIER_EXPL;
2546 else
2547 Flags = OMP_IDENT_BARRIER_IMPL;
2548 return Flags;
2549 }
2550
getDefaultScheduleAndChunk(CodeGenFunction & CGF,const OMPLoopDirective & S,OpenMPScheduleClauseKind & ScheduleKind,const Expr * & ChunkExpr) const2551 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2552 CodeGenFunction &CGF, const OMPLoopDirective &S,
2553 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2554 // Check if the loop directive is actually a doacross loop directive. In this
2555 // case choose static, 1 schedule.
2556 if (llvm::any_of(
2557 S.getClausesOfKind<OMPOrderedClause>(),
2558 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2559 ScheduleKind = OMPC_SCHEDULE_static;
2560 // Chunk size is 1 in this case.
2561 llvm::APInt ChunkSize(32, 1);
2562 ChunkExpr = IntegerLiteral::Create(
2563 CGF.getContext(), ChunkSize,
2564 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2565 SourceLocation());
2566 }
2567 }
2568
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)2569 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2570 OpenMPDirectiveKind Kind, bool EmitChecks,
2571 bool ForceSimpleCall) {
2572 // Check if we should use the OMPBuilder
2573 auto *OMPRegionInfo =
2574 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2575 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2576 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2577 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2578 return;
2579 }
2580
2581 if (!CGF.HaveInsertPoint())
2582 return;
2583 // Build call __kmpc_cancel_barrier(loc, thread_id);
2584 // Build call __kmpc_barrier(loc, thread_id);
2585 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2586 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2587 // thread_id);
2588 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2589 getThreadID(CGF, Loc)};
2590 if (OMPRegionInfo) {
2591 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2592 llvm::Value *Result = CGF.EmitRuntimeCall(
2593 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2594 OMPRTL___kmpc_cancel_barrier),
2595 Args);
2596 if (EmitChecks) {
2597 // if (__kmpc_cancel_barrier()) {
2598 // exit from construct;
2599 // }
2600 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2601 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2602 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2603 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2604 CGF.EmitBlock(ExitBB);
2605 // exit from construct;
2606 CodeGenFunction::JumpDest CancelDestination =
2607 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2608 CGF.EmitBranchThroughCleanup(CancelDestination);
2609 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2610 }
2611 return;
2612 }
2613 }
2614 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2615 CGM.getModule(), OMPRTL___kmpc_barrier),
2616 Args);
2617 }
2618
2619 /// Map the OpenMP loop schedule to the runtime enumeration.
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,bool Chunked,bool Ordered)2620 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2621 bool Chunked, bool Ordered) {
2622 switch (ScheduleKind) {
2623 case OMPC_SCHEDULE_static:
2624 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2625 : (Ordered ? OMP_ord_static : OMP_sch_static);
2626 case OMPC_SCHEDULE_dynamic:
2627 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2628 case OMPC_SCHEDULE_guided:
2629 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2630 case OMPC_SCHEDULE_runtime:
2631 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2632 case OMPC_SCHEDULE_auto:
2633 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2634 case OMPC_SCHEDULE_unknown:
2635 assert(!Chunked && "chunk was specified but schedule kind not known");
2636 return Ordered ? OMP_ord_static : OMP_sch_static;
2637 }
2638 llvm_unreachable("Unexpected runtime schedule");
2639 }
2640
2641 /// Map the OpenMP distribute schedule to the runtime enumeration.
2642 static OpenMPSchedType
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked)2643 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2644 // only static is allowed for dist_schedule
2645 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2646 }
2647
isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2648 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2649 bool Chunked) const {
2650 OpenMPSchedType Schedule =
2651 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2652 return Schedule == OMP_sch_static;
2653 }
2654
isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2655 bool CGOpenMPRuntime::isStaticNonchunked(
2656 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2657 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2658 return Schedule == OMP_dist_sch_static;
2659 }
2660
isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,bool Chunked) const2661 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2662 bool Chunked) const {
2663 OpenMPSchedType Schedule =
2664 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2665 return Schedule == OMP_sch_static_chunked;
2666 }
2667
isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,bool Chunked) const2668 bool CGOpenMPRuntime::isStaticChunked(
2669 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2670 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2671 return Schedule == OMP_dist_sch_static_chunked;
2672 }
2673
isDynamic(OpenMPScheduleClauseKind ScheduleKind) const2674 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2675 OpenMPSchedType Schedule =
2676 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2677 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2678 return Schedule != OMP_sch_static;
2679 }
2680
addMonoNonMonoModifier(CodeGenModule & CGM,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2)2681 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2682 OpenMPScheduleClauseModifier M1,
2683 OpenMPScheduleClauseModifier M2) {
2684 int Modifier = 0;
2685 switch (M1) {
2686 case OMPC_SCHEDULE_MODIFIER_monotonic:
2687 Modifier = OMP_sch_modifier_monotonic;
2688 break;
2689 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2690 Modifier = OMP_sch_modifier_nonmonotonic;
2691 break;
2692 case OMPC_SCHEDULE_MODIFIER_simd:
2693 if (Schedule == OMP_sch_static_chunked)
2694 Schedule = OMP_sch_static_balanced_chunked;
2695 break;
2696 case OMPC_SCHEDULE_MODIFIER_last:
2697 case OMPC_SCHEDULE_MODIFIER_unknown:
2698 break;
2699 }
2700 switch (M2) {
2701 case OMPC_SCHEDULE_MODIFIER_monotonic:
2702 Modifier = OMP_sch_modifier_monotonic;
2703 break;
2704 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2705 Modifier = OMP_sch_modifier_nonmonotonic;
2706 break;
2707 case OMPC_SCHEDULE_MODIFIER_simd:
2708 if (Schedule == OMP_sch_static_chunked)
2709 Schedule = OMP_sch_static_balanced_chunked;
2710 break;
2711 case OMPC_SCHEDULE_MODIFIER_last:
2712 case OMPC_SCHEDULE_MODIFIER_unknown:
2713 break;
2714 }
2715 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2716 // If the static schedule kind is specified or if the ordered clause is
2717 // specified, and if the nonmonotonic modifier is not specified, the effect is
2718 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2719 // modifier is specified, the effect is as if the nonmonotonic modifier is
2720 // specified.
2721 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2722 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2723 Schedule == OMP_sch_static_balanced_chunked ||
2724 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2725 Schedule == OMP_dist_sch_static_chunked ||
2726 Schedule == OMP_dist_sch_static))
2727 Modifier = OMP_sch_modifier_nonmonotonic;
2728 }
2729 return Schedule | Modifier;
2730 }
2731
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)2732 void CGOpenMPRuntime::emitForDispatchInit(
2733 CodeGenFunction &CGF, SourceLocation Loc,
2734 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2735 bool Ordered, const DispatchRTInput &DispatchValues) {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2738 OpenMPSchedType Schedule = getRuntimeSchedule(
2739 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2740 assert(Ordered ||
2741 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2742 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2743 Schedule != OMP_sch_static_balanced_chunked));
2744 // Call __kmpc_dispatch_init(
2745 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2746 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2747 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2748
2749 // If the Chunk was not specified in the clause - use default value 1.
2750 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2751 : CGF.Builder.getIntN(IVSize, 1);
2752 llvm::Value *Args[] = {
2753 emitUpdateLocation(CGF, Loc),
2754 getThreadID(CGF, Loc),
2755 CGF.Builder.getInt32(addMonoNonMonoModifier(
2756 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2757 DispatchValues.LB, // Lower
2758 DispatchValues.UB, // Upper
2759 CGF.Builder.getIntN(IVSize, 1), // Stride
2760 Chunk // Chunk
2761 };
2762 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2763 }
2764
emitForStaticInitCall(CodeGenFunction & CGF,llvm::Value * UpdateLocation,llvm::Value * ThreadId,llvm::FunctionCallee ForStaticInitFunction,OpenMPSchedType Schedule,OpenMPScheduleClauseModifier M1,OpenMPScheduleClauseModifier M2,const CGOpenMPRuntime::StaticRTInput & Values)2765 static void emitForStaticInitCall(
2766 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2767 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2768 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2769 const CGOpenMPRuntime::StaticRTInput &Values) {
2770 if (!CGF.HaveInsertPoint())
2771 return;
2772
2773 assert(!Values.Ordered);
2774 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2775 Schedule == OMP_sch_static_balanced_chunked ||
2776 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2777 Schedule == OMP_dist_sch_static ||
2778 Schedule == OMP_dist_sch_static_chunked);
2779
2780 // Call __kmpc_for_static_init(
2781 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2782 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2783 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2784 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2785 llvm::Value *Chunk = Values.Chunk;
2786 if (Chunk == nullptr) {
2787 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2788 Schedule == OMP_dist_sch_static) &&
2789 "expected static non-chunked schedule");
2790 // If the Chunk was not specified in the clause - use default value 1.
2791 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2792 } else {
2793 assert((Schedule == OMP_sch_static_chunked ||
2794 Schedule == OMP_sch_static_balanced_chunked ||
2795 Schedule == OMP_ord_static_chunked ||
2796 Schedule == OMP_dist_sch_static_chunked) &&
2797 "expected static chunked schedule");
2798 }
2799 llvm::Value *Args[] = {
2800 UpdateLocation,
2801 ThreadId,
2802 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2803 M2)), // Schedule type
2804 Values.IL.getPointer(), // &isLastIter
2805 Values.LB.getPointer(), // &LB
2806 Values.UB.getPointer(), // &UB
2807 Values.ST.getPointer(), // &Stride
2808 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2809 Chunk // Chunk
2810 };
2811 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2812 }
2813
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)2814 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2815 SourceLocation Loc,
2816 OpenMPDirectiveKind DKind,
2817 const OpenMPScheduleTy &ScheduleKind,
2818 const StaticRTInput &Values) {
2819 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2820 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2821 assert(isOpenMPWorksharingDirective(DKind) &&
2822 "Expected loop-based or sections-based directive.");
2823 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2824 isOpenMPLoopDirective(DKind)
2825 ? OMP_IDENT_WORK_LOOP
2826 : OMP_IDENT_WORK_SECTIONS);
2827 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2828 llvm::FunctionCallee StaticInitFunction =
2829 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2830 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2832 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2833 }
2834
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const CGOpenMPRuntime::StaticRTInput & Values)2835 void CGOpenMPRuntime::emitDistributeStaticInit(
2836 CodeGenFunction &CGF, SourceLocation Loc,
2837 OpenMPDistScheduleClauseKind SchedKind,
2838 const CGOpenMPRuntime::StaticRTInput &Values) {
2839 OpenMPSchedType ScheduleNum =
2840 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2841 llvm::Value *UpdatedLocation =
2842 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2843 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844 llvm::FunctionCallee StaticInitFunction =
2845 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2846 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2847 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2848 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2849 }
2850
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)2851 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2852 SourceLocation Loc,
2853 OpenMPDirectiveKind DKind) {
2854 if (!CGF.HaveInsertPoint())
2855 return;
2856 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2857 llvm::Value *Args[] = {
2858 emitUpdateLocation(CGF, Loc,
2859 isOpenMPDistributeDirective(DKind)
2860 ? OMP_IDENT_WORK_DISTRIBUTE
2861 : isOpenMPLoopDirective(DKind)
2862 ? OMP_IDENT_WORK_LOOP
2863 : OMP_IDENT_WORK_SECTIONS),
2864 getThreadID(CGF, Loc)};
2865 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868 Args);
2869 }
2870
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)2871 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2872 SourceLocation Loc,
2873 unsigned IVSize,
2874 bool IVSigned) {
2875 if (!CGF.HaveInsertPoint())
2876 return;
2877 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2878 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2879 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2880 }
2881
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)2882 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2883 SourceLocation Loc, unsigned IVSize,
2884 bool IVSigned, Address IL,
2885 Address LB, Address UB,
2886 Address ST) {
2887 // Call __kmpc_dispatch_next(
2888 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2889 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2890 // kmp_int[32|64] *p_stride);
2891 llvm::Value *Args[] = {
2892 emitUpdateLocation(CGF, Loc),
2893 getThreadID(CGF, Loc),
2894 IL.getPointer(), // &isLastIter
2895 LB.getPointer(), // &Lower
2896 UB.getPointer(), // &Upper
2897 ST.getPointer() // &Stride
2898 };
2899 llvm::Value *Call =
2900 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2901 return CGF.EmitScalarConversion(
2902 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2903 CGF.getContext().BoolTy, Loc);
2904 }
2905
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)2906 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2907 llvm::Value *NumThreads,
2908 SourceLocation Loc) {
2909 if (!CGF.HaveInsertPoint())
2910 return;
2911 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2912 llvm::Value *Args[] = {
2913 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2914 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2915 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2916 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2917 Args);
2918 }
2919
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)2920 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2921 ProcBindKind ProcBind,
2922 SourceLocation Loc) {
2923 if (!CGF.HaveInsertPoint())
2924 return;
2925 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2926 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2927 llvm::Value *Args[] = {
2928 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2929 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2930 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2931 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2932 Args);
2933 }
2934
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * >,SourceLocation Loc,llvm::AtomicOrdering AO)2935 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2936 SourceLocation Loc, llvm::AtomicOrdering AO) {
2937 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2938 OMPBuilder.createFlush(CGF.Builder);
2939 } else {
2940 if (!CGF.HaveInsertPoint())
2941 return;
2942 // Build call void __kmpc_flush(ident_t *loc)
2943 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2944 CGM.getModule(), OMPRTL___kmpc_flush),
2945 emitUpdateLocation(CGF, Loc));
2946 }
2947 }
2948
2949 namespace {
2950 /// Indexes of fields for type kmp_task_t.
2951 enum KmpTaskTFields {
2952 /// List of shared variables.
2953 KmpTaskTShareds,
2954 /// Task routine.
2955 KmpTaskTRoutine,
2956 /// Partition id for the untied tasks.
2957 KmpTaskTPartId,
2958 /// Function with call of destructors for private variables.
2959 Data1,
2960 /// Task priority.
2961 Data2,
2962 /// (Taskloops only) Lower bound.
2963 KmpTaskTLowerBound,
2964 /// (Taskloops only) Upper bound.
2965 KmpTaskTUpperBound,
2966 /// (Taskloops only) Stride.
2967 KmpTaskTStride,
2968 /// (Taskloops only) Is last iteration flag.
2969 KmpTaskTLastIter,
2970 /// (Taskloops only) Reduction data.
2971 KmpTaskTReductions,
2972 };
2973 } // anonymous namespace
2974
empty() const2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2976 return OffloadEntriesTargetRegion.empty() &&
2977 OffloadEntriesDeviceGlobalVar.empty();
2978 }
2979
2980 /// Initialize target region entry.
2981 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,unsigned Order)2982 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2983 StringRef ParentName, unsigned LineNum,
2984 unsigned Order) {
2985 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2986 "only required for the device "
2987 "code generation.");
2988 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2989 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2990 OMPTargetRegionEntryTargetRegion);
2991 ++OffloadingEntriesNum;
2992 }
2993
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,llvm::Constant * Addr,llvm::Constant * ID,OMPTargetRegionEntryKind Flags)2995 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2996 StringRef ParentName, unsigned LineNum,
2997 llvm::Constant *Addr, llvm::Constant *ID,
2998 OMPTargetRegionEntryKind Flags) {
2999 // If we are emitting code for a target, the entry is already initialized,
3000 // only has to be registered.
3001 if (CGM.getLangOpts().OpenMPIsDevice) {
3002 // This could happen if the device compilation is invoked standalone.
3003 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3004 return;
3005 auto &Entry =
3006 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3007 Entry.setAddress(Addr);
3008 Entry.setID(ID);
3009 Entry.setFlags(Flags);
3010 } else {
3011 if (Flags ==
3012 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3013 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3014 /*IgnoreAddressId*/ true))
3015 return;
3016 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3017 "Target region entry already registered!");
3018 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3019 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3020 ++OffloadingEntriesNum;
3021 }
3022 }
3023
hasTargetRegionEntryInfo(unsigned DeviceID,unsigned FileID,StringRef ParentName,unsigned LineNum,bool IgnoreAddressId) const3024 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3025 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3026 bool IgnoreAddressId) const {
3027 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3028 if (PerDevice == OffloadEntriesTargetRegion.end())
3029 return false;
3030 auto PerFile = PerDevice->second.find(FileID);
3031 if (PerFile == PerDevice->second.end())
3032 return false;
3033 auto PerParentName = PerFile->second.find(ParentName);
3034 if (PerParentName == PerFile->second.end())
3035 return false;
3036 auto PerLine = PerParentName->second.find(LineNum);
3037 if (PerLine == PerParentName->second.end())
3038 return false;
3039 // Fail if this entry is already registered.
3040 if (!IgnoreAddressId &&
3041 (PerLine->second.getAddress() || PerLine->second.getID()))
3042 return false;
3043 return true;
3044 }
3045
actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy & Action)3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3047 const OffloadTargetRegionEntryInfoActTy &Action) {
3048 // Scan all target region entries and perform the provided action.
3049 for (const auto &D : OffloadEntriesTargetRegion)
3050 for (const auto &F : D.second)
3051 for (const auto &P : F.second)
3052 for (const auto &L : P.second)
3053 Action(D.first, F.first, P.first(), L.first, L.second);
3054 }
3055
3056 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
initializeDeviceGlobalVarEntryInfo(StringRef Name,OMPTargetGlobalVarEntryKind Flags,unsigned Order)3057 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3058 OMPTargetGlobalVarEntryKind Flags,
3059 unsigned Order) {
3060 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3061 "only required for the device "
3062 "code generation.");
3063 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3064 ++OffloadingEntriesNum;
3065 }
3066
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerDeviceGlobalVarEntryInfo(StringRef VarName,llvm::Constant * Addr,CharUnits VarSize,OMPTargetGlobalVarEntryKind Flags,llvm::GlobalValue::LinkageTypes Linkage)3068 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3069 CharUnits VarSize,
3070 OMPTargetGlobalVarEntryKind Flags,
3071 llvm::GlobalValue::LinkageTypes Linkage) {
3072 if (CGM.getLangOpts().OpenMPIsDevice) {
3073 // This could happen if the device compilation is invoked standalone.
3074 if (!hasDeviceGlobalVarEntryInfo(VarName))
3075 return;
3076 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3077 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3078 if (Entry.getVarSize().isZero()) {
3079 Entry.setVarSize(VarSize);
3080 Entry.setLinkage(Linkage);
3081 }
3082 return;
3083 }
3084 Entry.setVarSize(VarSize);
3085 Entry.setLinkage(Linkage);
3086 Entry.setAddress(Addr);
3087 } else {
3088 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3089 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3090 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3091 "Entry not initialized!");
3092 if (Entry.getVarSize().isZero()) {
3093 Entry.setVarSize(VarSize);
3094 Entry.setLinkage(Linkage);
3095 }
3096 return;
3097 }
3098 OffloadEntriesDeviceGlobalVar.try_emplace(
3099 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3100 ++OffloadingEntriesNum;
3101 }
3102 }
3103
3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy & Action)3105 actOnDeviceGlobalVarEntriesInfo(
3106 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3107 // Scan all target region entries and perform the provided action.
3108 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3109 Action(E.getKey(), E.getValue());
3110 }
3111
createOffloadEntry(llvm::Constant * ID,llvm::Constant * Addr,uint64_t Size,int32_t Flags,llvm::GlobalValue::LinkageTypes Linkage)3112 void CGOpenMPRuntime::createOffloadEntry(
3113 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3114 llvm::GlobalValue::LinkageTypes Linkage) {
3115 StringRef Name = Addr->getName();
3116 llvm::Module &M = CGM.getModule();
3117 llvm::LLVMContext &C = M.getContext();
3118
3119 // Create constant string with the name.
3120 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3121
3122 std::string StringName = getName({"omp_offloading", "entry_name"});
3123 auto *Str = new llvm::GlobalVariable(
3124 M, StrPtrInit->getType(), /*isConstant=*/true,
3125 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3126 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3127
3128 llvm::Constant *Data[] = {
3129 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3130 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3131 llvm::ConstantInt::get(CGM.SizeTy, Size),
3132 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3133 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3134 std::string EntryName = getName({"omp_offloading", "entry", ""});
3135 llvm::GlobalVariable *Entry = createGlobalStruct(
3136 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3137 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3138
3139 // The entry has to be created in the section the linker expects it to be.
3140 Entry->setSection("omp_offloading_entries");
3141 }
3142
createOffloadEntriesAndInfoMetadata()3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3144 // Emit the offloading entries and metadata so that the device codegen side
3145 // can easily figure out what to emit. The produced metadata looks like
3146 // this:
3147 //
3148 // !omp_offload.info = !{!1, ...}
3149 //
3150 // Right now we only generate metadata for function that contain target
3151 // regions.
3152
3153 // If we are in simd mode or there are no entries, we don't need to do
3154 // anything.
3155 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3156 return;
3157
3158 llvm::Module &M = CGM.getModule();
3159 llvm::LLVMContext &C = M.getContext();
3160 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3161 SourceLocation, StringRef>,
3162 16>
3163 OrderedEntries(OffloadEntriesInfoManager.size());
3164 llvm::SmallVector<StringRef, 16> ParentFunctions(
3165 OffloadEntriesInfoManager.size());
3166
3167 // Auxiliary methods to create metadata values and strings.
3168 auto &&GetMDInt = [this](unsigned V) {
3169 return llvm::ConstantAsMetadata::get(
3170 llvm::ConstantInt::get(CGM.Int32Ty, V));
3171 };
3172
3173 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3174
3175 // Create the offloading info metadata node.
3176 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3177
3178 // Create function that emits metadata for each target region entry;
3179 auto &&TargetRegionMetadataEmitter =
3180 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3181 &GetMDString](
3182 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3183 unsigned Line,
3184 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3185 // Generate metadata for target regions. Each entry of this metadata
3186 // contains:
3187 // - Entry 0 -> Kind of this type of metadata (0).
3188 // - Entry 1 -> Device ID of the file where the entry was identified.
3189 // - Entry 2 -> File ID of the file where the entry was identified.
3190 // - Entry 3 -> Mangled name of the function where the entry was
3191 // identified.
3192 // - Entry 4 -> Line in the file where the entry was identified.
3193 // - Entry 5 -> Order the entry was created.
3194 // The first element of the metadata node is the kind.
3195 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3196 GetMDInt(FileID), GetMDString(ParentName),
3197 GetMDInt(Line), GetMDInt(E.getOrder())};
3198
3199 SourceLocation Loc;
3200 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3201 E = CGM.getContext().getSourceManager().fileinfo_end();
3202 I != E; ++I) {
3203 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3204 I->getFirst()->getUniqueID().getFile() == FileID) {
3205 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3206 I->getFirst(), Line, 1);
3207 break;
3208 }
3209 }
3210 // Save this entry in the right position of the ordered entries array.
3211 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3212 ParentFunctions[E.getOrder()] = ParentName;
3213
3214 // Add metadata to the named metadata node.
3215 MD->addOperand(llvm::MDNode::get(C, Ops));
3216 };
3217
3218 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3219 TargetRegionMetadataEmitter);
3220
3221 // Create function that emits metadata for each device global variable entry;
3222 auto &&DeviceGlobalVarMetadataEmitter =
3223 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3224 MD](StringRef MangledName,
3225 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3226 &E) {
3227 // Generate metadata for global variables. Each entry of this metadata
3228 // contains:
3229 // - Entry 0 -> Kind of this type of metadata (1).
3230 // - Entry 1 -> Mangled name of the variable.
3231 // - Entry 2 -> Declare target kind.
3232 // - Entry 3 -> Order the entry was created.
3233 // The first element of the metadata node is the kind.
3234 llvm::Metadata *Ops[] = {
3235 GetMDInt(E.getKind()), GetMDString(MangledName),
3236 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3237
3238 // Save this entry in the right position of the ordered entries array.
3239 OrderedEntries[E.getOrder()] =
3240 std::make_tuple(&E, SourceLocation(), MangledName);
3241
3242 // Add metadata to the named metadata node.
3243 MD->addOperand(llvm::MDNode::get(C, Ops));
3244 };
3245
3246 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3247 DeviceGlobalVarMetadataEmitter);
3248
3249 for (const auto &E : OrderedEntries) {
3250 assert(std::get<0>(E) && "All ordered entries must exist!");
3251 if (const auto *CE =
3252 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3253 std::get<0>(E))) {
3254 if (!CE->getID() || !CE->getAddress()) {
3255 // Do not blame the entry if the parent funtion is not emitted.
3256 StringRef FnName = ParentFunctions[CE->getOrder()];
3257 if (!CGM.GetGlobalValue(FnName))
3258 continue;
3259 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3260 DiagnosticsEngine::Error,
3261 "Offloading entry for target region in %0 is incorrect: either the "
3262 "address or the ID is invalid.");
3263 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3264 continue;
3265 }
3266 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3267 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3268 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3269 OffloadEntryInfoDeviceGlobalVar>(
3270 std::get<0>(E))) {
3271 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3272 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3273 CE->getFlags());
3274 switch (Flags) {
3275 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3276 if (CGM.getLangOpts().OpenMPIsDevice &&
3277 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3278 continue;
3279 if (!CE->getAddress()) {
3280 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281 DiagnosticsEngine::Error, "Offloading entry for declare target "
3282 "variable %0 is incorrect: the "
3283 "address is invalid.");
3284 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3285 continue;
3286 }
3287 // The vaiable has no definition - no need to add the entry.
3288 if (CE->getVarSize().isZero())
3289 continue;
3290 break;
3291 }
3292 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3293 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3294 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3295 "Declaret target link address is set.");
3296 if (CGM.getLangOpts().OpenMPIsDevice)
3297 continue;
3298 if (!CE->getAddress()) {
3299 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3300 DiagnosticsEngine::Error,
3301 "Offloading entry for declare target variable is incorrect: the "
3302 "address is invalid.");
3303 CGM.getDiags().Report(DiagID);
3304 continue;
3305 }
3306 break;
3307 }
3308 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3309 CE->getVarSize().getQuantity(), Flags,
3310 CE->getLinkage());
3311 } else {
3312 llvm_unreachable("Unsupported entry kind.");
3313 }
3314 }
3315 }
3316
3317 /// Loads all the offload entries information from the host IR
3318 /// metadata.
loadOffloadInfoMetadata()3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3320 // If we are in target mode, load the metadata from the host IR. This code has
3321 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3322
3323 if (!CGM.getLangOpts().OpenMPIsDevice)
3324 return;
3325
3326 if (CGM.getLangOpts().OMPHostIRFile.empty())
3327 return;
3328
3329 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3330 if (auto EC = Buf.getError()) {
3331 CGM.getDiags().Report(diag::err_cannot_open_file)
3332 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3333 return;
3334 }
3335
3336 llvm::LLVMContext C;
3337 auto ME = expectedToErrorOrAndEmitErrors(
3338 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3339
3340 if (auto EC = ME.getError()) {
3341 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3342 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3343 CGM.getDiags().Report(DiagID)
3344 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3345 return;
3346 }
3347
3348 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3349 if (!MD)
3350 return;
3351
3352 for (llvm::MDNode *MN : MD->operands()) {
3353 auto &&GetMDInt = [MN](unsigned Idx) {
3354 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3355 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3356 };
3357
3358 auto &&GetMDString = [MN](unsigned Idx) {
3359 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3360 return V->getString();
3361 };
3362
3363 switch (GetMDInt(0)) {
3364 default:
3365 llvm_unreachable("Unexpected metadata!");
3366 break;
3367 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3368 OffloadingEntryInfoTargetRegion:
3369 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3370 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3371 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3372 /*Order=*/GetMDInt(5));
3373 break;
3374 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375 OffloadingEntryInfoDeviceGlobalVar:
3376 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3377 /*MangledName=*/GetMDString(1),
3378 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3379 /*Flags=*/GetMDInt(2)),
3380 /*Order=*/GetMDInt(3));
3381 break;
3382 }
3383 }
3384 }
3385
emitKmpRoutineEntryT(QualType KmpInt32Ty)3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3387 if (!KmpRoutineEntryPtrTy) {
3388 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3389 ASTContext &C = CGM.getContext();
3390 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3391 FunctionProtoType::ExtProtoInfo EPI;
3392 KmpRoutineEntryPtrQTy = C.getPointerType(
3393 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3394 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3395 }
3396 }
3397
getTgtOffloadEntryQTy()3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3399 // Make sure the type of the entry is already created. This is the type we
3400 // have to create:
3401 // struct __tgt_offload_entry{
3402 // void *addr; // Pointer to the offload entry info.
3403 // // (function or global)
3404 // char *name; // Name of the function or global.
3405 // size_t size; // Size of the entry info (0 if it a function).
3406 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3407 // int32_t reserved; // Reserved, to use by the runtime library.
3408 // };
3409 if (TgtOffloadEntryQTy.isNull()) {
3410 ASTContext &C = CGM.getContext();
3411 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3412 RD->startDefinition();
3413 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3414 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3415 addFieldToRecordDecl(C, RD, C.getSizeType());
3416 addFieldToRecordDecl(
3417 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3418 addFieldToRecordDecl(
3419 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3420 RD->completeDefinition();
3421 RD->addAttr(PackedAttr::CreateImplicit(C));
3422 TgtOffloadEntryQTy = C.getRecordType(RD);
3423 }
3424 return TgtOffloadEntryQTy;
3425 }
3426
3427 namespace {
3428 struct PrivateHelpersTy {
PrivateHelpersTy__anona0766a091611::PrivateHelpersTy3429 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3430 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3431 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3432 PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy__anona0766a091611::PrivateHelpersTy3433 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3434 const Expr *OriginalRef = nullptr;
3435 const VarDecl *Original = nullptr;
3436 const VarDecl *PrivateCopy = nullptr;
3437 const VarDecl *PrivateElemInit = nullptr;
isLocalPrivate__anona0766a091611::PrivateHelpersTy3438 bool isLocalPrivate() const {
3439 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3440 }
3441 };
3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3443 } // anonymous namespace
3444
isAllocatableDecl(const VarDecl * VD)3445 static bool isAllocatableDecl(const VarDecl *VD) {
3446 const VarDecl *CVD = VD->getCanonicalDecl();
3447 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3448 return false;
3449 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3450 // Use the default allocation.
3451 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3452 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3453 !AA->getAllocator());
3454 }
3455
3456 static RecordDecl *
createPrivatesRecordDecl(CodeGenModule & CGM,ArrayRef<PrivateDataTy> Privates)3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3458 if (!Privates.empty()) {
3459 ASTContext &C = CGM.getContext();
3460 // Build struct .kmp_privates_t. {
3461 // /* private vars */
3462 // };
3463 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3464 RD->startDefinition();
3465 for (const auto &Pair : Privates) {
3466 const VarDecl *VD = Pair.second.Original;
3467 QualType Type = VD->getType().getNonReferenceType();
3468 // If the private variable is a local variable with lvalue ref type,
3469 // allocate the pointer instead of the pointee type.
3470 if (Pair.second.isLocalPrivate()) {
3471 if (VD->getType()->isLValueReferenceType())
3472 Type = C.getPointerType(Type);
3473 if (isAllocatableDecl(VD))
3474 Type = C.getPointerType(Type);
3475 }
3476 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3477 if (VD->hasAttrs()) {
3478 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3479 E(VD->getAttrs().end());
3480 I != E; ++I)
3481 FD->addAttr(*I);
3482 }
3483 }
3484 RD->completeDefinition();
3485 return RD;
3486 }
3487 return nullptr;
3488 }
3489
3490 static RecordDecl *
createKmpTaskTRecordDecl(CodeGenModule & CGM,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpRoutineEntryPointerQTy)3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3492 QualType KmpInt32Ty,
3493 QualType KmpRoutineEntryPointerQTy) {
3494 ASTContext &C = CGM.getContext();
3495 // Build struct kmp_task_t {
3496 // void * shareds;
3497 // kmp_routine_entry_t routine;
3498 // kmp_int32 part_id;
3499 // kmp_cmplrdata_t data1;
3500 // kmp_cmplrdata_t data2;
3501 // For taskloops additional fields:
3502 // kmp_uint64 lb;
3503 // kmp_uint64 ub;
3504 // kmp_int64 st;
3505 // kmp_int32 liter;
3506 // void * reductions;
3507 // };
3508 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3509 UD->startDefinition();
3510 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3511 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3512 UD->completeDefinition();
3513 QualType KmpCmplrdataTy = C.getRecordType(UD);
3514 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3515 RD->startDefinition();
3516 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3517 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3518 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3519 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3521 if (isOpenMPTaskLoopDirective(Kind)) {
3522 QualType KmpUInt64Ty =
3523 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3524 QualType KmpInt64Ty =
3525 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3526 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3528 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3529 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3530 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3531 }
3532 RD->completeDefinition();
3533 return RD;
3534 }
3535
3536 static RecordDecl *
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule & CGM,QualType KmpTaskTQTy,ArrayRef<PrivateDataTy> Privates)3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3538 ArrayRef<PrivateDataTy> Privates) {
3539 ASTContext &C = CGM.getContext();
3540 // Build struct kmp_task_t_with_privates {
3541 // kmp_task_t task_data;
3542 // .kmp_privates_t. privates;
3543 // };
3544 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3545 RD->startDefinition();
3546 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3547 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3548 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3549 RD->completeDefinition();
3550 return RD;
3551 }
3552
3553 /// Emit a proxy function which accepts kmp_task_t as the second
3554 /// argument.
3555 /// \code
3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3557 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3558 /// For taskloops:
3559 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3560 /// tt->reductions, tt->shareds);
3561 /// return 0;
3562 /// }
3563 /// \endcode
3564 static llvm::Function *
emitProxyTaskFunction(CodeGenModule & CGM,SourceLocation Loc,OpenMPDirectiveKind Kind,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy,QualType KmpTaskTQTy,QualType SharedsPtrTy,llvm::Function * TaskFunction,llvm::Value * TaskPrivatesMap)3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3566 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3567 QualType KmpTaskTWithPrivatesPtrQTy,
3568 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3569 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3570 llvm::Value *TaskPrivatesMap) {
3571 ASTContext &C = CGM.getContext();
3572 FunctionArgList Args;
3573 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3574 ImplicitParamDecl::Other);
3575 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3576 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3577 ImplicitParamDecl::Other);
3578 Args.push_back(&GtidArg);
3579 Args.push_back(&TaskTypeArg);
3580 const auto &TaskEntryFnInfo =
3581 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3582 llvm::FunctionType *TaskEntryTy =
3583 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3584 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3585 auto *TaskEntry = llvm::Function::Create(
3586 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3587 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3588 TaskEntry->setDoesNotRecurse();
3589 CodeGenFunction CGF(CGM);
3590 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3591 Loc, Loc);
3592
3593 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3594 // tt,
3595 // For taskloops:
3596 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3597 // tt->task_data.shareds);
3598 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3599 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3600 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3601 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3602 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3603 const auto *KmpTaskTWithPrivatesQTyRD =
3604 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3605 LValue Base =
3606 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3607 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3608 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3609 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3610 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3611
3612 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3613 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3614 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3615 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3616 CGF.ConvertTypeForMem(SharedsPtrTy));
3617
3618 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3619 llvm::Value *PrivatesParam;
3620 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3621 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3622 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3623 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3624 } else {
3625 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3626 }
3627
3628 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3629 TaskPrivatesMap,
3630 CGF.Builder
3631 .CreatePointerBitCastOrAddrSpaceCast(
3632 TDBase.getAddress(CGF), CGF.VoidPtrTy)
3633 .getPointer()};
3634 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3635 std::end(CommonArgs));
3636 if (isOpenMPTaskLoopDirective(Kind)) {
3637 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3638 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3639 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3640 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3641 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3642 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3643 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3644 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3645 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3646 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3647 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3648 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3649 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3650 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3651 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3652 CallArgs.push_back(LBParam);
3653 CallArgs.push_back(UBParam);
3654 CallArgs.push_back(StParam);
3655 CallArgs.push_back(LIParam);
3656 CallArgs.push_back(RParam);
3657 }
3658 CallArgs.push_back(SharedsParam);
3659
3660 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3661 CallArgs);
3662 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3663 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3664 CGF.FinishFunction();
3665 return TaskEntry;
3666 }
3667
emitDestructorsFunction(CodeGenModule & CGM,SourceLocation Loc,QualType KmpInt32Ty,QualType KmpTaskTWithPrivatesPtrQTy,QualType KmpTaskTWithPrivatesQTy)3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3669 SourceLocation Loc,
3670 QualType KmpInt32Ty,
3671 QualType KmpTaskTWithPrivatesPtrQTy,
3672 QualType KmpTaskTWithPrivatesQTy) {
3673 ASTContext &C = CGM.getContext();
3674 FunctionArgList Args;
3675 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3676 ImplicitParamDecl::Other);
3677 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3678 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3679 ImplicitParamDecl::Other);
3680 Args.push_back(&GtidArg);
3681 Args.push_back(&TaskTypeArg);
3682 const auto &DestructorFnInfo =
3683 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3684 llvm::FunctionType *DestructorFnTy =
3685 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3686 std::string Name =
3687 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3688 auto *DestructorFn =
3689 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3690 Name, &CGM.getModule());
3691 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3692 DestructorFnInfo);
3693 DestructorFn->setDoesNotRecurse();
3694 CodeGenFunction CGF(CGM);
3695 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3696 Args, Loc, Loc);
3697
3698 LValue Base = CGF.EmitLoadOfPointerLValue(
3699 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3700 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3701 const auto *KmpTaskTWithPrivatesQTyRD =
3702 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3703 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3704 Base = CGF.EmitLValueForField(Base, *FI);
3705 for (const auto *Field :
3706 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3707 if (QualType::DestructionKind DtorKind =
3708 Field->getType().isDestructedType()) {
3709 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3710 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3711 }
3712 }
3713 CGF.FinishFunction();
3714 return DestructorFn;
3715 }
3716
3717 /// Emit a privates mapping function for correct handling of private and
3718 /// firstprivate variables.
3719 /// \code
3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3721 /// **noalias priv1,..., <tyn> **noalias privn) {
3722 /// *priv1 = &.privates.priv1;
3723 /// ...;
3724 /// *privn = &.privates.privn;
3725 /// }
3726 /// \endcode
3727 static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPTaskDataTy & Data,QualType PrivatesQTy,ArrayRef<PrivateDataTy> Privates)3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3729 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3730 ArrayRef<PrivateDataTy> Privates) {
3731 ASTContext &C = CGM.getContext();
3732 FunctionArgList Args;
3733 ImplicitParamDecl TaskPrivatesArg(
3734 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3735 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3736 ImplicitParamDecl::Other);
3737 Args.push_back(&TaskPrivatesArg);
3738 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3739 unsigned Counter = 1;
3740 for (const Expr *E : Data.PrivateVars) {
3741 Args.push_back(ImplicitParamDecl::Create(
3742 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3743 C.getPointerType(C.getPointerType(E->getType()))
3744 .withConst()
3745 .withRestrict(),
3746 ImplicitParamDecl::Other));
3747 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3748 PrivateVarsPos[VD] = Counter;
3749 ++Counter;
3750 }
3751 for (const Expr *E : Data.FirstprivateVars) {
3752 Args.push_back(ImplicitParamDecl::Create(
3753 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754 C.getPointerType(C.getPointerType(E->getType()))
3755 .withConst()
3756 .withRestrict(),
3757 ImplicitParamDecl::Other));
3758 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3759 PrivateVarsPos[VD] = Counter;
3760 ++Counter;
3761 }
3762 for (const Expr *E : Data.LastprivateVars) {
3763 Args.push_back(ImplicitParamDecl::Create(
3764 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765 C.getPointerType(C.getPointerType(E->getType()))
3766 .withConst()
3767 .withRestrict(),
3768 ImplicitParamDecl::Other));
3769 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770 PrivateVarsPos[VD] = Counter;
3771 ++Counter;
3772 }
3773 for (const VarDecl *VD : Data.PrivateLocals) {
3774 QualType Ty = VD->getType().getNonReferenceType();
3775 if (VD->getType()->isLValueReferenceType())
3776 Ty = C.getPointerType(Ty);
3777 if (isAllocatableDecl(VD))
3778 Ty = C.getPointerType(Ty);
3779 Args.push_back(ImplicitParamDecl::Create(
3780 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3782 ImplicitParamDecl::Other));
3783 PrivateVarsPos[VD] = Counter;
3784 ++Counter;
3785 }
3786 const auto &TaskPrivatesMapFnInfo =
3787 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788 llvm::FunctionType *TaskPrivatesMapTy =
3789 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790 std::string Name =
3791 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792 auto *TaskPrivatesMap = llvm::Function::Create(
3793 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794 &CGM.getModule());
3795 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796 TaskPrivatesMapFnInfo);
3797 if (CGM.getLangOpts().Optimize) {
3798 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801 }
3802 CodeGenFunction CGF(CGM);
3803 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805
3806 // *privi = &.privates.privi;
3807 LValue Base = CGF.EmitLoadOfPointerLValue(
3808 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809 TaskPrivatesArg.getType()->castAs<PointerType>());
3810 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811 Counter = 0;
3812 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815 LValue RefLVal =
3816 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820 ++Counter;
3821 }
3822 CGF.FinishFunction();
3823 return TaskPrivatesMap;
3824 }
3825
3826 /// Emit initialization for private variables in task-based directives.
emitPrivatesInit(CodeGenFunction & CGF,const OMPExecutableDirective & D,Address KmpTaskSharedsPtr,LValue TDBase,const RecordDecl * KmpTaskTWithPrivatesQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool ForDup)3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828 const OMPExecutableDirective &D,
3829 Address KmpTaskSharedsPtr, LValue TDBase,
3830 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831 QualType SharedsTy, QualType SharedsPtrTy,
3832 const OMPTaskDataTy &Data,
3833 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834 ASTContext &C = CGF.getContext();
3835 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838 ? OMPD_taskloop
3839 : OMPD_task;
3840 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842 LValue SrcBase;
3843 bool IsTargetTask =
3844 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3847 // PointersArray, SizesArray, and MappersArray. The original variables for
3848 // these arrays are not captured and we get their addresses explicitly.
3849 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851 SrcBase = CGF.MakeAddrLValue(
3852 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854 SharedsTy);
3855 }
3856 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857 for (const PrivateDataTy &Pair : Privates) {
3858 // Do not initialize private locals.
3859 if (Pair.second.isLocalPrivate()) {
3860 ++FI;
3861 continue;
3862 }
3863 const VarDecl *VD = Pair.second.PrivateCopy;
3864 const Expr *Init = VD->getAnyInitializer();
3865 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3866 !CGF.isTrivialInitializer(Init)))) {
3867 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3868 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3869 const VarDecl *OriginalVD = Pair.second.Original;
3870 // Check if the variable is the target-based BasePointersArray,
3871 // PointersArray, SizesArray, or MappersArray.
3872 LValue SharedRefLValue;
3873 QualType Type = PrivateLValue.getType();
3874 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3875 if (IsTargetTask && !SharedField) {
3876 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3877 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3878 cast<CapturedDecl>(OriginalVD->getDeclContext())
3879 ->getNumParams() == 0 &&
3880 isa<TranslationUnitDecl>(
3881 cast<CapturedDecl>(OriginalVD->getDeclContext())
3882 ->getDeclContext()) &&
3883 "Expected artificial target data variable.");
3884 SharedRefLValue =
3885 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3886 } else if (ForDup) {
3887 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3888 SharedRefLValue = CGF.MakeAddrLValue(
3889 Address(SharedRefLValue.getPointer(CGF),
3890 C.getDeclAlign(OriginalVD)),
3891 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3892 SharedRefLValue.getTBAAInfo());
3893 } else if (CGF.LambdaCaptureFields.count(
3894 Pair.second.Original->getCanonicalDecl()) > 0 ||
3895 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3896 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3897 } else {
3898 // Processing for implicitly captured variables.
3899 InlinedOpenMPRegionRAII Region(
3900 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3901 /*HasCancel=*/false, /*NoInheritance=*/true);
3902 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3903 }
3904 if (Type->isArrayType()) {
3905 // Initialize firstprivate array.
3906 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3907 // Perform simple memcpy.
3908 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3909 } else {
3910 // Initialize firstprivate array using element-by-element
3911 // initialization.
3912 CGF.EmitOMPAggregateAssign(
3913 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3914 Type,
3915 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3916 Address SrcElement) {
3917 // Clean up any temporaries needed by the initialization.
3918 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3919 InitScope.addPrivate(
3920 Elem, [SrcElement]() -> Address { return SrcElement; });
3921 (void)InitScope.Privatize();
3922 // Emit initialization for single element.
3923 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3924 CGF, &CapturesInfo);
3925 CGF.EmitAnyExprToMem(Init, DestElement,
3926 Init->getType().getQualifiers(),
3927 /*IsInitializer=*/false);
3928 });
3929 }
3930 } else {
3931 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3932 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3933 return SharedRefLValue.getAddress(CGF);
3934 });
3935 (void)InitScope.Privatize();
3936 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3937 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3938 /*capturedByInit=*/false);
3939 }
3940 } else {
3941 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3942 }
3943 }
3944 ++FI;
3945 }
3946 }
3947
3948 /// Check if duplication function is required for taskloops.
checkInitIsRequired(CodeGenFunction & CGF,ArrayRef<PrivateDataTy> Privates)3949 static bool checkInitIsRequired(CodeGenFunction &CGF,
3950 ArrayRef<PrivateDataTy> Privates) {
3951 bool InitRequired = false;
3952 for (const PrivateDataTy &Pair : Privates) {
3953 if (Pair.second.isLocalPrivate())
3954 continue;
3955 const VarDecl *VD = Pair.second.PrivateCopy;
3956 const Expr *Init = VD->getAnyInitializer();
3957 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3958 !CGF.isTrivialInitializer(Init));
3959 if (InitRequired)
3960 break;
3961 }
3962 return InitRequired;
3963 }
3964
3965
3966 /// Emit task_dup function (for initialization of
3967 /// private/firstprivate/lastprivate vars and last_iter flag)
3968 /// \code
3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3970 /// lastpriv) {
3971 /// // setup lastprivate flag
3972 /// task_dst->last = lastpriv;
3973 /// // could be constructor calls here...
3974 /// }
3975 /// \endcode
3976 static llvm::Value *
emitTaskDupFunction(CodeGenModule & CGM,SourceLocation Loc,const OMPExecutableDirective & D,QualType KmpTaskTWithPrivatesPtrQTy,const RecordDecl * KmpTaskTWithPrivatesQTyRD,const RecordDecl * KmpTaskTQTyRD,QualType SharedsTy,QualType SharedsPtrTy,const OMPTaskDataTy & Data,ArrayRef<PrivateDataTy> Privates,bool WithLastIter)3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3978 const OMPExecutableDirective &D,
3979 QualType KmpTaskTWithPrivatesPtrQTy,
3980 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3981 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3982 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3983 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3984 ASTContext &C = CGM.getContext();
3985 FunctionArgList Args;
3986 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3987 KmpTaskTWithPrivatesPtrQTy,
3988 ImplicitParamDecl::Other);
3989 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3990 KmpTaskTWithPrivatesPtrQTy,
3991 ImplicitParamDecl::Other);
3992 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3993 ImplicitParamDecl::Other);
3994 Args.push_back(&DstArg);
3995 Args.push_back(&SrcArg);
3996 Args.push_back(&LastprivArg);
3997 const auto &TaskDupFnInfo =
3998 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3999 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4000 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4001 auto *TaskDup = llvm::Function::Create(
4002 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4003 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4004 TaskDup->setDoesNotRecurse();
4005 CodeGenFunction CGF(CGM);
4006 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4007 Loc);
4008
4009 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4010 CGF.GetAddrOfLocalVar(&DstArg),
4011 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4012 // task_dst->liter = lastpriv;
4013 if (WithLastIter) {
4014 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4015 LValue Base = CGF.EmitLValueForField(
4016 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4017 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4018 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4019 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4020 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4021 }
4022
4023 // Emit initial values for private copies (if any).
4024 assert(!Privates.empty());
4025 Address KmpTaskSharedsPtr = Address::invalid();
4026 if (!Data.FirstprivateVars.empty()) {
4027 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028 CGF.GetAddrOfLocalVar(&SrcArg),
4029 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030 LValue Base = CGF.EmitLValueForField(
4031 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4032 KmpTaskSharedsPtr = Address(
4033 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4034 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4035 KmpTaskTShareds)),
4036 Loc),
4037 CGM.getNaturalTypeAlignment(SharedsTy));
4038 }
4039 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4040 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4041 CGF.FinishFunction();
4042 return TaskDup;
4043 }
4044
4045 /// Checks if destructor function is required to be generated.
4046 /// \return true if cleanups are required, false otherwise.
4047 static bool
checkDestructorsRequired(const RecordDecl * KmpTaskTWithPrivatesQTyRD,ArrayRef<PrivateDataTy> Privates)4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4049 ArrayRef<PrivateDataTy> Privates) {
4050 for (const PrivateDataTy &P : Privates) {
4051 if (P.second.isLocalPrivate())
4052 continue;
4053 QualType Ty = P.second.Original->getType().getNonReferenceType();
4054 if (Ty.isDestructedType())
4055 return true;
4056 }
4057 return false;
4058 }
4059
4060 namespace {
4061 /// Loop generator for OpenMP iterator expression.
4062 class OMPIteratorGeneratorScope final
4063 : public CodeGenFunction::OMPPrivateScope {
4064 CodeGenFunction &CGF;
4065 const OMPIteratorExpr *E = nullptr;
4066 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4067 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4068 OMPIteratorGeneratorScope() = delete;
4069 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4070
4071 public:
OMPIteratorGeneratorScope(CodeGenFunction & CGF,const OMPIteratorExpr * E)4072 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4073 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4074 if (!E)
4075 return;
4076 SmallVector<llvm::Value *, 4> Uppers;
4077 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4078 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4079 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4080 addPrivate(VD, [&CGF, VD]() {
4081 return CGF.CreateMemTemp(VD->getType(), VD->getName());
4082 });
4083 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4084 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4085 return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4086 "counter.addr");
4087 });
4088 }
4089 Privatize();
4090
4091 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4093 LValue CLVal =
4094 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4095 HelperData.CounterVD->getType());
4096 // Counter = 0;
4097 CGF.EmitStoreOfScalar(
4098 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4099 CLVal);
4100 CodeGenFunction::JumpDest &ContDest =
4101 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4102 CodeGenFunction::JumpDest &ExitDest =
4103 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4104 // N = <number-of_iterations>;
4105 llvm::Value *N = Uppers[I];
4106 // cont:
4107 // if (Counter < N) goto body; else goto exit;
4108 CGF.EmitBlock(ContDest.getBlock());
4109 auto *CVal =
4110 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4111 llvm::Value *Cmp =
4112 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4113 ? CGF.Builder.CreateICmpSLT(CVal, N)
4114 : CGF.Builder.CreateICmpULT(CVal, N);
4115 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4116 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4117 // body:
4118 CGF.EmitBlock(BodyBB);
4119 // Iteri = Begini + Counter * Stepi;
4120 CGF.EmitIgnoredExpr(HelperData.Update);
4121 }
4122 }
~OMPIteratorGeneratorScope()4123 ~OMPIteratorGeneratorScope() {
4124 if (!E)
4125 return;
4126 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4127 // Counter = Counter + 1;
4128 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4129 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4130 // goto cont;
4131 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4132 // exit:
4133 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4134 }
4135 }
4136 };
4137 } // namespace
4138
4139 static std::pair<llvm::Value *, llvm::Value *>
getPointerAndSize(CodeGenFunction & CGF,const Expr * E)4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4141 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4142 llvm::Value *Addr;
4143 if (OASE) {
4144 const Expr *Base = OASE->getBase();
4145 Addr = CGF.EmitScalarExpr(Base);
4146 } else {
4147 Addr = CGF.EmitLValue(E).getPointer(CGF);
4148 }
4149 llvm::Value *SizeVal;
4150 QualType Ty = E->getType();
4151 if (OASE) {
4152 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4153 for (const Expr *SE : OASE->getDimensions()) {
4154 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4155 Sz = CGF.EmitScalarConversion(
4156 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4157 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4158 }
4159 } else if (const auto *ASE =
4160 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4161 LValue UpAddrLVal =
4162 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4163 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4164 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4165 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4166 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4167 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4168 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4169 } else {
4170 SizeVal = CGF.getTypeSize(Ty);
4171 }
4172 return std::make_pair(Addr, SizeVal);
4173 }
4174
4175 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getKmpAffinityType(ASTContext & C,QualType & KmpTaskAffinityInfoTy)4176 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4177 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4178 if (KmpTaskAffinityInfoTy.isNull()) {
4179 RecordDecl *KmpAffinityInfoRD =
4180 C.buildImplicitRecord("kmp_task_affinity_info_t");
4181 KmpAffinityInfoRD->startDefinition();
4182 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4183 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4184 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4185 KmpAffinityInfoRD->completeDefinition();
4186 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4187 }
4188 }
4189
4190 CGOpenMPRuntime::TaskResultTy
emitTaskInit(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const OMPTaskDataTy & Data)4191 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4192 const OMPExecutableDirective &D,
4193 llvm::Function *TaskFunction, QualType SharedsTy,
4194 Address Shareds, const OMPTaskDataTy &Data) {
4195 ASTContext &C = CGM.getContext();
4196 llvm::SmallVector<PrivateDataTy, 4> Privates;
4197 // Aggregate privates and sort them by the alignment.
4198 const auto *I = Data.PrivateCopies.begin();
4199 for (const Expr *E : Data.PrivateVars) {
4200 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4201 Privates.emplace_back(
4202 C.getDeclAlign(VD),
4203 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4204 /*PrivateElemInit=*/nullptr));
4205 ++I;
4206 }
4207 I = Data.FirstprivateCopies.begin();
4208 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4209 for (const Expr *E : Data.FirstprivateVars) {
4210 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4211 Privates.emplace_back(
4212 C.getDeclAlign(VD),
4213 PrivateHelpersTy(
4214 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4215 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4216 ++I;
4217 ++IElemInitRef;
4218 }
4219 I = Data.LastprivateCopies.begin();
4220 for (const Expr *E : Data.LastprivateVars) {
4221 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222 Privates.emplace_back(
4223 C.getDeclAlign(VD),
4224 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225 /*PrivateElemInit=*/nullptr));
4226 ++I;
4227 }
4228 for (const VarDecl *VD : Data.PrivateLocals) {
4229 if (isAllocatableDecl(VD))
4230 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4231 else
4232 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4233 }
4234 llvm::stable_sort(Privates,
4235 [](const PrivateDataTy &L, const PrivateDataTy &R) {
4236 return L.first > R.first;
4237 });
4238 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4239 // Build type kmp_routine_entry_t (if not built yet).
4240 emitKmpRoutineEntryT(KmpInt32Ty);
4241 // Build type kmp_task_t (if not built yet).
4242 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4243 if (SavedKmpTaskloopTQTy.isNull()) {
4244 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4245 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4246 }
4247 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4248 } else {
4249 assert((D.getDirectiveKind() == OMPD_task ||
4250 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4251 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4252 "Expected taskloop, task or target directive");
4253 if (SavedKmpTaskTQTy.isNull()) {
4254 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4255 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4256 }
4257 KmpTaskTQTy = SavedKmpTaskTQTy;
4258 }
4259 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4260 // Build particular struct kmp_task_t for the given task.
4261 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4262 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4263 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4264 QualType KmpTaskTWithPrivatesPtrQTy =
4265 C.getPointerType(KmpTaskTWithPrivatesQTy);
4266 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4267 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4268 KmpTaskTWithPrivatesTy->getPointerTo();
4269 llvm::Value *KmpTaskTWithPrivatesTySize =
4270 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4271 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4272
4273 // Emit initial values for private copies (if any).
4274 llvm::Value *TaskPrivatesMap = nullptr;
4275 llvm::Type *TaskPrivatesMapTy =
4276 std::next(TaskFunction->arg_begin(), 3)->getType();
4277 if (!Privates.empty()) {
4278 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4279 TaskPrivatesMap =
4280 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4281 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4282 TaskPrivatesMap, TaskPrivatesMapTy);
4283 } else {
4284 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4285 cast<llvm::PointerType>(TaskPrivatesMapTy));
4286 }
4287 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4288 // kmp_task_t *tt);
4289 llvm::Function *TaskEntry = emitProxyTaskFunction(
4290 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4291 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4292 TaskPrivatesMap);
4293
4294 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4295 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4296 // kmp_routine_entry_t *task_entry);
4297 // Task flags. Format is taken from
4298 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4299 // description of kmp_tasking_flags struct.
4300 enum {
4301 TiedFlag = 0x1,
4302 FinalFlag = 0x2,
4303 DestructorsFlag = 0x8,
4304 PriorityFlag = 0x20,
4305 DetachableFlag = 0x40,
4306 };
4307 unsigned Flags = Data.Tied ? TiedFlag : 0;
4308 bool NeedsCleanup = false;
4309 if (!Privates.empty()) {
4310 NeedsCleanup =
4311 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4312 if (NeedsCleanup)
4313 Flags = Flags | DestructorsFlag;
4314 }
4315 if (Data.Priority.getInt())
4316 Flags = Flags | PriorityFlag;
4317 if (D.hasClausesOfKind<OMPDetachClause>())
4318 Flags = Flags | DetachableFlag;
4319 llvm::Value *TaskFlags =
4320 Data.Final.getPointer()
4321 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4322 CGF.Builder.getInt32(FinalFlag),
4323 CGF.Builder.getInt32(/*C=*/0))
4324 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4325 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4326 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4327 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4328 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4329 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4330 TaskEntry, KmpRoutineEntryPtrTy)};
4331 llvm::Value *NewTask;
4332 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4333 // Check if we have any device clause associated with the directive.
4334 const Expr *Device = nullptr;
4335 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4336 Device = C->getDevice();
4337 // Emit device ID if any otherwise use default value.
4338 llvm::Value *DeviceID;
4339 if (Device)
4340 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4341 CGF.Int64Ty, /*isSigned=*/true);
4342 else
4343 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4344 AllocArgs.push_back(DeviceID);
4345 NewTask = CGF.EmitRuntimeCall(
4346 OMPBuilder.getOrCreateRuntimeFunction(
4347 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4348 AllocArgs);
4349 } else {
4350 NewTask =
4351 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4352 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4353 AllocArgs);
4354 }
4355 // Emit detach clause initialization.
4356 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4357 // task_descriptor);
4358 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4359 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4360 LValue EvtLVal = CGF.EmitLValue(Evt);
4361
4362 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4363 // int gtid, kmp_task_t *task);
4364 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4365 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4366 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4367 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4368 OMPBuilder.getOrCreateRuntimeFunction(
4369 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4370 {Loc, Tid, NewTask});
4371 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4372 Evt->getExprLoc());
4373 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4374 }
4375 // Process affinity clauses.
4376 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4377 // Process list of affinity data.
4378 ASTContext &C = CGM.getContext();
4379 Address AffinitiesArray = Address::invalid();
4380 // Calculate number of elements to form the array of affinity data.
4381 llvm::Value *NumOfElements = nullptr;
4382 unsigned NumAffinities = 0;
4383 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4384 if (const Expr *Modifier = C->getModifier()) {
4385 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4386 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4387 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4388 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4389 NumOfElements =
4390 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4391 }
4392 } else {
4393 NumAffinities += C->varlist_size();
4394 }
4395 }
4396 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4397 // Fields ids in kmp_task_affinity_info record.
4398 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4399
4400 QualType KmpTaskAffinityInfoArrayTy;
4401 if (NumOfElements) {
4402 NumOfElements = CGF.Builder.CreateNUWAdd(
4403 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4404 OpaqueValueExpr OVE(
4405 Loc,
4406 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4407 VK_PRValue);
4408 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4409 RValue::get(NumOfElements));
4410 KmpTaskAffinityInfoArrayTy =
4411 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4412 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4413 // Properly emit variable-sized array.
4414 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4415 ImplicitParamDecl::Other);
4416 CGF.EmitVarDecl(*PD);
4417 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4418 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4419 /*isSigned=*/false);
4420 } else {
4421 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4422 KmpTaskAffinityInfoTy,
4423 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4424 ArrayType::Normal, /*IndexTypeQuals=*/0);
4425 AffinitiesArray =
4426 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4427 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4428 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4429 /*isSigned=*/false);
4430 }
4431
4432 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4433 // Fill array by elements without iterators.
4434 unsigned Pos = 0;
4435 bool HasIterator = false;
4436 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4437 if (C->getModifier()) {
4438 HasIterator = true;
4439 continue;
4440 }
4441 for (const Expr *E : C->varlists()) {
4442 llvm::Value *Addr;
4443 llvm::Value *Size;
4444 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4445 LValue Base =
4446 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4447 KmpTaskAffinityInfoTy);
4448 // affs[i].base_addr = &<Affinities[i].second>;
4449 LValue BaseAddrLVal = CGF.EmitLValueForField(
4450 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4451 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4452 BaseAddrLVal);
4453 // affs[i].len = sizeof(<Affinities[i].second>);
4454 LValue LenLVal = CGF.EmitLValueForField(
4455 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4456 CGF.EmitStoreOfScalar(Size, LenLVal);
4457 ++Pos;
4458 }
4459 }
4460 LValue PosLVal;
4461 if (HasIterator) {
4462 PosLVal = CGF.MakeAddrLValue(
4463 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4464 C.getSizeType());
4465 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4466 }
4467 // Process elements with iterators.
4468 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4469 const Expr *Modifier = C->getModifier();
4470 if (!Modifier)
4471 continue;
4472 OMPIteratorGeneratorScope IteratorScope(
4473 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4474 for (const Expr *E : C->varlists()) {
4475 llvm::Value *Addr;
4476 llvm::Value *Size;
4477 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4478 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4479 LValue Base = CGF.MakeAddrLValue(
4480 Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4481 AffinitiesArray.getPointer(), Idx),
4482 AffinitiesArray.getAlignment()),
4483 KmpTaskAffinityInfoTy);
4484 // affs[i].base_addr = &<Affinities[i].second>;
4485 LValue BaseAddrLVal = CGF.EmitLValueForField(
4486 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4487 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4488 BaseAddrLVal);
4489 // affs[i].len = sizeof(<Affinities[i].second>);
4490 LValue LenLVal = CGF.EmitLValueForField(
4491 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4492 CGF.EmitStoreOfScalar(Size, LenLVal);
4493 Idx = CGF.Builder.CreateNUWAdd(
4494 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4495 CGF.EmitStoreOfScalar(Idx, PosLVal);
4496 }
4497 }
4498 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4499 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4500 // naffins, kmp_task_affinity_info_t *affin_list);
4501 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4502 llvm::Value *GTid = getThreadID(CGF, Loc);
4503 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4504 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4505 // FIXME: Emit the function and ignore its result for now unless the
4506 // runtime function is properly implemented.
4507 (void)CGF.EmitRuntimeCall(
4508 OMPBuilder.getOrCreateRuntimeFunction(
4509 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4510 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4511 }
4512 llvm::Value *NewTaskNewTaskTTy =
4513 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4514 NewTask, KmpTaskTWithPrivatesPtrTy);
4515 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4516 KmpTaskTWithPrivatesQTy);
4517 LValue TDBase =
4518 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4519 // Fill the data in the resulting kmp_task_t record.
4520 // Copy shareds if there are any.
4521 Address KmpTaskSharedsPtr = Address::invalid();
4522 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4523 KmpTaskSharedsPtr =
4524 Address(CGF.EmitLoadOfScalar(
4525 CGF.EmitLValueForField(
4526 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4527 KmpTaskTShareds)),
4528 Loc),
4529 CGM.getNaturalTypeAlignment(SharedsTy));
4530 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4531 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4532 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4533 }
4534 // Emit initial values for private copies (if any).
4535 TaskResultTy Result;
4536 if (!Privates.empty()) {
4537 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4538 SharedsTy, SharedsPtrTy, Data, Privates,
4539 /*ForDup=*/false);
4540 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4541 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4542 Result.TaskDupFn = emitTaskDupFunction(
4543 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4544 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4545 /*WithLastIter=*/!Data.LastprivateVars.empty());
4546 }
4547 }
4548 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4549 enum { Priority = 0, Destructors = 1 };
4550 // Provide pointer to function with destructors for privates.
4551 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4552 const RecordDecl *KmpCmplrdataUD =
4553 (*FI)->getType()->getAsUnionType()->getDecl();
4554 if (NeedsCleanup) {
4555 llvm::Value *DestructorFn = emitDestructorsFunction(
4556 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4557 KmpTaskTWithPrivatesQTy);
4558 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4559 LValue DestructorsLV = CGF.EmitLValueForField(
4560 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4561 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4562 DestructorFn, KmpRoutineEntryPtrTy),
4563 DestructorsLV);
4564 }
4565 // Set priority.
4566 if (Data.Priority.getInt()) {
4567 LValue Data2LV = CGF.EmitLValueForField(
4568 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4569 LValue PriorityLV = CGF.EmitLValueForField(
4570 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4571 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4572 }
4573 Result.NewTask = NewTask;
4574 Result.TaskEntry = TaskEntry;
4575 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4576 Result.TDBase = TDBase;
4577 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4578 return Result;
4579 }
4580
4581 namespace {
4582 /// Dependence kind for RTL.
4583 enum RTLDependenceKindTy {
4584 DepIn = 0x01,
4585 DepInOut = 0x3,
4586 DepMutexInOutSet = 0x4
4587 };
4588 /// Fields ids in kmp_depend_info record.
4589 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4590 } // namespace
4591
4592 /// Translates internal dependency kind into the runtime kind.
translateDependencyKind(OpenMPDependClauseKind K)4593 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4594 RTLDependenceKindTy DepKind;
4595 switch (K) {
4596 case OMPC_DEPEND_in:
4597 DepKind = DepIn;
4598 break;
4599 // Out and InOut dependencies must use the same code.
4600 case OMPC_DEPEND_out:
4601 case OMPC_DEPEND_inout:
4602 DepKind = DepInOut;
4603 break;
4604 case OMPC_DEPEND_mutexinoutset:
4605 DepKind = DepMutexInOutSet;
4606 break;
4607 case OMPC_DEPEND_source:
4608 case OMPC_DEPEND_sink:
4609 case OMPC_DEPEND_depobj:
4610 case OMPC_DEPEND_unknown:
4611 llvm_unreachable("Unknown task dependence type");
4612 }
4613 return DepKind;
4614 }
4615
4616 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
getDependTypes(ASTContext & C,QualType & KmpDependInfoTy,QualType & FlagsTy)4617 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4618 QualType &FlagsTy) {
4619 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4620 if (KmpDependInfoTy.isNull()) {
4621 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4622 KmpDependInfoRD->startDefinition();
4623 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4624 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4625 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4626 KmpDependInfoRD->completeDefinition();
4627 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4628 }
4629 }
4630
4631 std::pair<llvm::Value *, LValue>
getDepobjElements(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)4632 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4633 SourceLocation Loc) {
4634 ASTContext &C = CGM.getContext();
4635 QualType FlagsTy;
4636 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4637 RecordDecl *KmpDependInfoRD =
4638 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4639 LValue Base = CGF.EmitLoadOfPointerLValue(
4640 DepobjLVal.getAddress(CGF),
4641 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4642 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4643 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4644 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4645 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4646 Base.getTBAAInfo());
4647 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4648 Addr.getElementType(), Addr.getPointer(),
4649 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4650 LValue NumDepsBase = CGF.MakeAddrLValue(
4651 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4652 Base.getBaseInfo(), Base.getTBAAInfo());
4653 // NumDeps = deps[i].base_addr;
4654 LValue BaseAddrLVal = CGF.EmitLValueForField(
4655 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4656 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4657 return std::make_pair(NumDeps, Base);
4658 }
4659
emitDependData(CodeGenFunction & CGF,QualType & KmpDependInfoTy,llvm::PointerUnion<unsigned *,LValue * > Pos,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4660 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4661 llvm::PointerUnion<unsigned *, LValue *> Pos,
4662 const OMPTaskDataTy::DependData &Data,
4663 Address DependenciesArray) {
4664 CodeGenModule &CGM = CGF.CGM;
4665 ASTContext &C = CGM.getContext();
4666 QualType FlagsTy;
4667 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668 RecordDecl *KmpDependInfoRD =
4669 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4671
4672 OMPIteratorGeneratorScope IteratorScope(
4673 CGF, cast_or_null<OMPIteratorExpr>(
4674 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4675 : nullptr));
4676 for (const Expr *E : Data.DepExprs) {
4677 llvm::Value *Addr;
4678 llvm::Value *Size;
4679 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4680 LValue Base;
4681 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4682 Base = CGF.MakeAddrLValue(
4683 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4684 } else {
4685 LValue &PosLVal = *Pos.get<LValue *>();
4686 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4687 Base = CGF.MakeAddrLValue(
4688 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4689 DependenciesArray.getPointer(), Idx),
4690 DependenciesArray.getAlignment()),
4691 KmpDependInfoTy);
4692 }
4693 // deps[i].base_addr = &<Dependencies[i].second>;
4694 LValue BaseAddrLVal = CGF.EmitLValueForField(
4695 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4696 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4697 BaseAddrLVal);
4698 // deps[i].len = sizeof(<Dependencies[i].second>);
4699 LValue LenLVal = CGF.EmitLValueForField(
4700 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4701 CGF.EmitStoreOfScalar(Size, LenLVal);
4702 // deps[i].flags = <Dependencies[i].first>;
4703 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4704 LValue FlagsLVal = CGF.EmitLValueForField(
4705 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4706 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4707 FlagsLVal);
4708 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4709 ++(*P);
4710 } else {
4711 LValue &PosLVal = *Pos.get<LValue *>();
4712 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4713 Idx = CGF.Builder.CreateNUWAdd(Idx,
4714 llvm::ConstantInt::get(Idx->getType(), 1));
4715 CGF.EmitStoreOfScalar(Idx, PosLVal);
4716 }
4717 }
4718 }
4719
4720 static SmallVector<llvm::Value *, 4>
emitDepobjElementsSizes(CodeGenFunction & CGF,QualType & KmpDependInfoTy,const OMPTaskDataTy::DependData & Data)4721 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4722 const OMPTaskDataTy::DependData &Data) {
4723 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4724 "Expected depobj dependecy kind.");
4725 SmallVector<llvm::Value *, 4> Sizes;
4726 SmallVector<LValue, 4> SizeLVals;
4727 ASTContext &C = CGF.getContext();
4728 QualType FlagsTy;
4729 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4730 RecordDecl *KmpDependInfoRD =
4731 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4732 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4733 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4734 {
4735 OMPIteratorGeneratorScope IteratorScope(
4736 CGF, cast_or_null<OMPIteratorExpr>(
4737 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4738 : nullptr));
4739 for (const Expr *E : Data.DepExprs) {
4740 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4741 LValue Base = CGF.EmitLoadOfPointerLValue(
4742 DepobjLVal.getAddress(CGF),
4743 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4744 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4745 Base.getAddress(CGF), KmpDependInfoPtrT);
4746 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4747 Base.getTBAAInfo());
4748 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4749 Addr.getElementType(), Addr.getPointer(),
4750 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4751 LValue NumDepsBase = CGF.MakeAddrLValue(
4752 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4753 Base.getBaseInfo(), Base.getTBAAInfo());
4754 // NumDeps = deps[i].base_addr;
4755 LValue BaseAddrLVal = CGF.EmitLValueForField(
4756 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4757 llvm::Value *NumDeps =
4758 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4759 LValue NumLVal = CGF.MakeAddrLValue(
4760 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761 C.getUIntPtrType());
4762 CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4763 llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4764 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766 CGF.EmitStoreOfScalar(Add, NumLVal);
4767 SizeLVals.push_back(NumLVal);
4768 }
4769 }
4770 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771 llvm::Value *Size =
4772 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773 Sizes.push_back(Size);
4774 }
4775 return Sizes;
4776 }
4777
emitDepobjElements(CodeGenFunction & CGF,QualType & KmpDependInfoTy,LValue PosLVal,const OMPTaskDataTy::DependData & Data,Address DependenciesArray)4778 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4779 LValue PosLVal,
4780 const OMPTaskDataTy::DependData &Data,
4781 Address DependenciesArray) {
4782 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4783 "Expected depobj dependecy kind.");
4784 ASTContext &C = CGF.getContext();
4785 QualType FlagsTy;
4786 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4787 RecordDecl *KmpDependInfoRD =
4788 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4789 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4790 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4791 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4792 {
4793 OMPIteratorGeneratorScope IteratorScope(
4794 CGF, cast_or_null<OMPIteratorExpr>(
4795 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4796 : nullptr));
4797 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4798 const Expr *E = Data.DepExprs[I];
4799 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4800 LValue Base = CGF.EmitLoadOfPointerLValue(
4801 DepobjLVal.getAddress(CGF),
4802 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4803 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804 Base.getAddress(CGF), KmpDependInfoPtrT);
4805 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4806 Base.getTBAAInfo());
4807
4808 // Get number of elements in a single depobj.
4809 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4810 Addr.getElementType(), Addr.getPointer(),
4811 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4812 LValue NumDepsBase = CGF.MakeAddrLValue(
4813 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4814 Base.getBaseInfo(), Base.getTBAAInfo());
4815 // NumDeps = deps[i].base_addr;
4816 LValue BaseAddrLVal = CGF.EmitLValueForField(
4817 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4818 llvm::Value *NumDeps =
4819 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4820
4821 // memcopy dependency data.
4822 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4823 ElSize,
4824 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4825 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4826 Address DepAddr =
4827 Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4828 DependenciesArray.getPointer(), Pos),
4829 DependenciesArray.getAlignment());
4830 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4831
4832 // Increase pos.
4833 // pos += size;
4834 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4835 CGF.EmitStoreOfScalar(Add, PosLVal);
4836 }
4837 }
4838 }
4839
emitDependClause(CodeGenFunction & CGF,ArrayRef<OMPTaskDataTy::DependData> Dependencies,SourceLocation Loc)4840 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4841 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4842 SourceLocation Loc) {
4843 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4844 return D.DepExprs.empty();
4845 }))
4846 return std::make_pair(nullptr, Address::invalid());
4847 // Process list of dependencies.
4848 ASTContext &C = CGM.getContext();
4849 Address DependenciesArray = Address::invalid();
4850 llvm::Value *NumOfElements = nullptr;
4851 unsigned NumDependencies = std::accumulate(
4852 Dependencies.begin(), Dependencies.end(), 0,
4853 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4854 return D.DepKind == OMPC_DEPEND_depobj
4855 ? V
4856 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4857 });
4858 QualType FlagsTy;
4859 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4860 bool HasDepobjDeps = false;
4861 bool HasRegularWithIterators = false;
4862 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4863 llvm::Value *NumOfRegularWithIterators =
4864 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4865 // Calculate number of depobj dependecies and regular deps with the iterators.
4866 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4867 if (D.DepKind == OMPC_DEPEND_depobj) {
4868 SmallVector<llvm::Value *, 4> Sizes =
4869 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4870 for (llvm::Value *Size : Sizes) {
4871 NumOfDepobjElements =
4872 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4873 }
4874 HasDepobjDeps = true;
4875 continue;
4876 }
4877 // Include number of iterations, if any.
4878 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4879 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4880 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4881 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4882 NumOfRegularWithIterators =
4883 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4884 }
4885 HasRegularWithIterators = true;
4886 continue;
4887 }
4888 }
4889
4890 QualType KmpDependInfoArrayTy;
4891 if (HasDepobjDeps || HasRegularWithIterators) {
4892 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4893 /*isSigned=*/false);
4894 if (HasDepobjDeps) {
4895 NumOfElements =
4896 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4897 }
4898 if (HasRegularWithIterators) {
4899 NumOfElements =
4900 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4901 }
4902 OpaqueValueExpr OVE(Loc,
4903 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4904 VK_PRValue);
4905 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4906 RValue::get(NumOfElements));
4907 KmpDependInfoArrayTy =
4908 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4909 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4910 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4911 // Properly emit variable-sized array.
4912 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4913 ImplicitParamDecl::Other);
4914 CGF.EmitVarDecl(*PD);
4915 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4916 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4917 /*isSigned=*/false);
4918 } else {
4919 KmpDependInfoArrayTy = C.getConstantArrayType(
4920 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4921 ArrayType::Normal, /*IndexTypeQuals=*/0);
4922 DependenciesArray =
4923 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4924 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4925 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4926 /*isSigned=*/false);
4927 }
4928 unsigned Pos = 0;
4929 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4930 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4931 Dependencies[I].IteratorExpr)
4932 continue;
4933 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4934 DependenciesArray);
4935 }
4936 // Copy regular dependecies with iterators.
4937 LValue PosLVal = CGF.MakeAddrLValue(
4938 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4939 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4940 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4942 !Dependencies[I].IteratorExpr)
4943 continue;
4944 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4945 DependenciesArray);
4946 }
4947 // Copy final depobj arrays without iterators.
4948 if (HasDepobjDeps) {
4949 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4951 continue;
4952 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4953 DependenciesArray);
4954 }
4955 }
4956 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4957 DependenciesArray, CGF.VoidPtrTy);
4958 return std::make_pair(NumOfElements, DependenciesArray);
4959 }
4960
emitDepobjDependClause(CodeGenFunction & CGF,const OMPTaskDataTy::DependData & Dependencies,SourceLocation Loc)4961 Address CGOpenMPRuntime::emitDepobjDependClause(
4962 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4963 SourceLocation Loc) {
4964 if (Dependencies.DepExprs.empty())
4965 return Address::invalid();
4966 // Process list of dependencies.
4967 ASTContext &C = CGM.getContext();
4968 Address DependenciesArray = Address::invalid();
4969 unsigned NumDependencies = Dependencies.DepExprs.size();
4970 QualType FlagsTy;
4971 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4972 RecordDecl *KmpDependInfoRD =
4973 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4974
4975 llvm::Value *Size;
4976 // Define type kmp_depend_info[<Dependencies.size()>];
4977 // For depobj reserve one extra element to store the number of elements.
4978 // It is required to handle depobj(x) update(in) construct.
4979 // kmp_depend_info[<Dependencies.size()>] deps;
4980 llvm::Value *NumDepsVal;
4981 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4982 if (const auto *IE =
4983 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4984 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4985 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4986 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4987 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4988 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4989 }
4990 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4991 NumDepsVal);
4992 CharUnits SizeInBytes =
4993 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4994 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4995 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4996 NumDepsVal =
4997 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4998 } else {
4999 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5000 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5001 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5002 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5003 Size = CGM.getSize(Sz.alignTo(Align));
5004 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5005 }
5006 // Need to allocate on the dynamic memory.
5007 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5008 // Use default allocator.
5009 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010 llvm::Value *Args[] = {ThreadID, Size, Allocator};
5011
5012 llvm::Value *Addr =
5013 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5014 CGM.getModule(), OMPRTL___kmpc_alloc),
5015 Args, ".dep.arr.addr");
5016 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5017 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5018 DependenciesArray = Address(Addr, Align);
5019 // Write number of elements in the first element of array for depobj.
5020 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5021 // deps[i].base_addr = NumDependencies;
5022 LValue BaseAddrLVal = CGF.EmitLValueForField(
5023 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5024 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5025 llvm::PointerUnion<unsigned *, LValue *> Pos;
5026 unsigned Idx = 1;
5027 LValue PosLVal;
5028 if (Dependencies.IteratorExpr) {
5029 PosLVal = CGF.MakeAddrLValue(
5030 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5031 C.getSizeType());
5032 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5033 /*IsInit=*/true);
5034 Pos = &PosLVal;
5035 } else {
5036 Pos = &Idx;
5037 }
5038 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5039 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5040 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5041 return DependenciesArray;
5042 }
5043
emitDestroyClause(CodeGenFunction & CGF,LValue DepobjLVal,SourceLocation Loc)5044 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5045 SourceLocation Loc) {
5046 ASTContext &C = CGM.getContext();
5047 QualType FlagsTy;
5048 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5049 LValue Base = CGF.EmitLoadOfPointerLValue(
5050 DepobjLVal.getAddress(CGF),
5051 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5052 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5053 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5054 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5055 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5056 Addr.getElementType(), Addr.getPointer(),
5057 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5058 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5059 CGF.VoidPtrTy);
5060 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5061 // Use default allocator.
5062 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5063 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5064
5065 // _kmpc_free(gtid, addr, nullptr);
5066 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067 CGM.getModule(), OMPRTL___kmpc_free),
5068 Args);
5069 }
5070
emitUpdateClause(CodeGenFunction & CGF,LValue DepobjLVal,OpenMPDependClauseKind NewDepKind,SourceLocation Loc)5071 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5072 OpenMPDependClauseKind NewDepKind,
5073 SourceLocation Loc) {
5074 ASTContext &C = CGM.getContext();
5075 QualType FlagsTy;
5076 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5077 RecordDecl *KmpDependInfoRD =
5078 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5079 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5080 llvm::Value *NumDeps;
5081 LValue Base;
5082 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5083
5084 Address Begin = Base.getAddress(CGF);
5085 // Cast from pointer to array type to pointer to single element.
5086 llvm::Value *End = CGF.Builder.CreateGEP(
5087 Begin.getElementType(), Begin.getPointer(), NumDeps);
5088 // The basic structure here is a while-do loop.
5089 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5090 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5091 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5092 CGF.EmitBlock(BodyBB);
5093 llvm::PHINode *ElementPHI =
5094 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5095 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5096 Begin = Address(ElementPHI, Begin.getAlignment());
5097 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5098 Base.getTBAAInfo());
5099 // deps[i].flags = NewDepKind;
5100 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5101 LValue FlagsLVal = CGF.EmitLValueForField(
5102 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5103 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5104 FlagsLVal);
5105
5106 // Shift the address forward by one element.
5107 Address ElementNext =
5108 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5109 ElementPHI->addIncoming(ElementNext.getPointer(),
5110 CGF.Builder.GetInsertBlock());
5111 llvm::Value *IsEmpty =
5112 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5113 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5114 // Done.
5115 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5116 }
5117
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5118 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5119 const OMPExecutableDirective &D,
5120 llvm::Function *TaskFunction,
5121 QualType SharedsTy, Address Shareds,
5122 const Expr *IfCond,
5123 const OMPTaskDataTy &Data) {
5124 if (!CGF.HaveInsertPoint())
5125 return;
5126
5127 TaskResultTy Result =
5128 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5129 llvm::Value *NewTask = Result.NewTask;
5130 llvm::Function *TaskEntry = Result.TaskEntry;
5131 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5132 LValue TDBase = Result.TDBase;
5133 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5134 // Process list of dependences.
5135 Address DependenciesArray = Address::invalid();
5136 llvm::Value *NumOfElements;
5137 std::tie(NumOfElements, DependenciesArray) =
5138 emitDependClause(CGF, Data.Dependences, Loc);
5139
5140 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141 // libcall.
5142 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5143 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5144 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5145 // list is not empty
5146 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5147 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5148 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5149 llvm::Value *DepTaskArgs[7];
5150 if (!Data.Dependences.empty()) {
5151 DepTaskArgs[0] = UpLoc;
5152 DepTaskArgs[1] = ThreadID;
5153 DepTaskArgs[2] = NewTask;
5154 DepTaskArgs[3] = NumOfElements;
5155 DepTaskArgs[4] = DependenciesArray.getPointer();
5156 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5157 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5158 }
5159 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5160 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5161 if (!Data.Tied) {
5162 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5163 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5164 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5165 }
5166 if (!Data.Dependences.empty()) {
5167 CGF.EmitRuntimeCall(
5168 OMPBuilder.getOrCreateRuntimeFunction(
5169 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5170 DepTaskArgs);
5171 } else {
5172 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5173 CGM.getModule(), OMPRTL___kmpc_omp_task),
5174 TaskArgs);
5175 }
5176 // Check if parent region is untied and build return for untied task;
5177 if (auto *Region =
5178 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5179 Region->emitUntiedSwitch(CGF);
5180 };
5181
5182 llvm::Value *DepWaitTaskArgs[6];
5183 if (!Data.Dependences.empty()) {
5184 DepWaitTaskArgs[0] = UpLoc;
5185 DepWaitTaskArgs[1] = ThreadID;
5186 DepWaitTaskArgs[2] = NumOfElements;
5187 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5188 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5189 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5190 }
5191 auto &M = CGM.getModule();
5192 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5193 TaskEntry, &Data, &DepWaitTaskArgs,
5194 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5195 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5196 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5197 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5198 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5199 // is specified.
5200 if (!Data.Dependences.empty())
5201 CGF.EmitRuntimeCall(
5202 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5203 DepWaitTaskArgs);
5204 // Call proxy_task_entry(gtid, new_task);
5205 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5206 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5207 Action.Enter(CGF);
5208 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5209 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5210 OutlinedFnArgs);
5211 };
5212
5213 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5214 // kmp_task_t *new_task);
5215 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5216 // kmp_task_t *new_task);
5217 RegionCodeGenTy RCG(CodeGen);
5218 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5219 M, OMPRTL___kmpc_omp_task_begin_if0),
5220 TaskArgs,
5221 OMPBuilder.getOrCreateRuntimeFunction(
5222 M, OMPRTL___kmpc_omp_task_complete_if0),
5223 TaskArgs);
5224 RCG.setAction(Action);
5225 RCG(CGF);
5226 };
5227
5228 if (IfCond) {
5229 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5230 } else {
5231 RegionCodeGenTy ThenRCG(ThenCodeGen);
5232 ThenRCG(CGF);
5233 }
5234 }
5235
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)5236 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5237 const OMPLoopDirective &D,
5238 llvm::Function *TaskFunction,
5239 QualType SharedsTy, Address Shareds,
5240 const Expr *IfCond,
5241 const OMPTaskDataTy &Data) {
5242 if (!CGF.HaveInsertPoint())
5243 return;
5244 TaskResultTy Result =
5245 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5246 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5247 // libcall.
5248 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5249 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5250 // sched, kmp_uint64 grainsize, void *task_dup);
5251 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5252 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5253 llvm::Value *IfVal;
5254 if (IfCond) {
5255 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5256 /*isSigned=*/true);
5257 } else {
5258 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5259 }
5260
5261 LValue LBLVal = CGF.EmitLValueForField(
5262 Result.TDBase,
5263 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5264 const auto *LBVar =
5265 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5266 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5267 LBLVal.getQuals(),
5268 /*IsInitializer=*/true);
5269 LValue UBLVal = CGF.EmitLValueForField(
5270 Result.TDBase,
5271 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5272 const auto *UBVar =
5273 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5274 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5275 UBLVal.getQuals(),
5276 /*IsInitializer=*/true);
5277 LValue StLVal = CGF.EmitLValueForField(
5278 Result.TDBase,
5279 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5280 const auto *StVar =
5281 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5282 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5283 StLVal.getQuals(),
5284 /*IsInitializer=*/true);
5285 // Store reductions address.
5286 LValue RedLVal = CGF.EmitLValueForField(
5287 Result.TDBase,
5288 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5289 if (Data.Reductions) {
5290 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5291 } else {
5292 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5293 CGF.getContext().VoidPtrTy);
5294 }
5295 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5296 llvm::Value *TaskArgs[] = {
5297 UpLoc,
5298 ThreadID,
5299 Result.NewTask,
5300 IfVal,
5301 LBLVal.getPointer(CGF),
5302 UBLVal.getPointer(CGF),
5303 CGF.EmitLoadOfScalar(StLVal, Loc),
5304 llvm::ConstantInt::getSigned(
5305 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5306 llvm::ConstantInt::getSigned(
5307 CGF.IntTy, Data.Schedule.getPointer()
5308 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5309 : NoSchedule),
5310 Data.Schedule.getPointer()
5311 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5312 /*isSigned=*/false)
5313 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5314 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5315 Result.TaskDupFn, CGF.VoidPtrTy)
5316 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5317 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5318 CGM.getModule(), OMPRTL___kmpc_taskloop),
5319 TaskArgs);
5320 }
5321
5322 /// Emit reduction operation for each element of array (required for
5323 /// array sections) LHS op = RHS.
5324 /// \param Type Type of array.
5325 /// \param LHSVar Variable on the left side of the reduction operation
5326 /// (references element of array in original variable).
5327 /// \param RHSVar Variable on the right side of the reduction operation
5328 /// (references element of array in original variable).
5329 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5330 /// RHSVar.
EmitOMPAggregateReduction(CodeGenFunction & CGF,QualType Type,const VarDecl * LHSVar,const VarDecl * RHSVar,const llvm::function_ref<void (CodeGenFunction & CGF,const Expr *,const Expr *,const Expr *)> & RedOpGen,const Expr * XExpr=nullptr,const Expr * EExpr=nullptr,const Expr * UpExpr=nullptr)5331 static void EmitOMPAggregateReduction(
5332 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5333 const VarDecl *RHSVar,
5334 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5335 const Expr *, const Expr *)> &RedOpGen,
5336 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5337 const Expr *UpExpr = nullptr) {
5338 // Perform element-by-element initialization.
5339 QualType ElementTy;
5340 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5341 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5342
5343 // Drill down to the base element type on both arrays.
5344 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5345 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5346
5347 llvm::Value *RHSBegin = RHSAddr.getPointer();
5348 llvm::Value *LHSBegin = LHSAddr.getPointer();
5349 // Cast from pointer to array type to pointer to single element.
5350 llvm::Value *LHSEnd =
5351 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5352 // The basic structure here is a while-do loop.
5353 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5354 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5355 llvm::Value *IsEmpty =
5356 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5357 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5358
5359 // Enter the loop body, making that address the current address.
5360 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5361 CGF.EmitBlock(BodyBB);
5362
5363 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5364
5365 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5366 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5367 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5368 Address RHSElementCurrent =
5369 Address(RHSElementPHI,
5370 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5371
5372 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5373 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5374 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5375 Address LHSElementCurrent =
5376 Address(LHSElementPHI,
5377 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5378
5379 // Emit copy.
5380 CodeGenFunction::OMPPrivateScope Scope(CGF);
5381 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5382 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5383 Scope.Privatize();
5384 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5385 Scope.ForceCleanup();
5386
5387 // Shift the address forward by one element.
5388 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5389 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5390 "omp.arraycpy.dest.element");
5391 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5392 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5393 "omp.arraycpy.src.element");
5394 // Check whether we've reached the end.
5395 llvm::Value *Done =
5396 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5397 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5398 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5399 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5400
5401 // Done.
5402 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5403 }
5404
5405 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5406 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5407 /// UDR combiner function.
emitReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp)5408 static void emitReductionCombiner(CodeGenFunction &CGF,
5409 const Expr *ReductionOp) {
5410 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5411 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5412 if (const auto *DRE =
5413 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5414 if (const auto *DRD =
5415 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5416 std::pair<llvm::Function *, llvm::Function *> Reduction =
5417 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5418 RValue Func = RValue::get(Reduction.first);
5419 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5420 CGF.EmitIgnoredExpr(ReductionOp);
5421 return;
5422 }
5423 CGF.EmitIgnoredExpr(ReductionOp);
5424 }
5425
emitReductionFunction(SourceLocation Loc,llvm::Type * ArgsType,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps)5426 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5427 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5428 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5429 ArrayRef<const Expr *> ReductionOps) {
5430 ASTContext &C = CGM.getContext();
5431
5432 // void reduction_func(void *LHSArg, void *RHSArg);
5433 FunctionArgList Args;
5434 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5435 ImplicitParamDecl::Other);
5436 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5437 ImplicitParamDecl::Other);
5438 Args.push_back(&LHSArg);
5439 Args.push_back(&RHSArg);
5440 const auto &CGFI =
5441 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5442 std::string Name = getName({"omp", "reduction", "reduction_func"});
5443 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5444 llvm::GlobalValue::InternalLinkage, Name,
5445 &CGM.getModule());
5446 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5447 Fn->setDoesNotRecurse();
5448 CodeGenFunction CGF(CGM);
5449 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5450
5451 // Dst = (void*[n])(LHSArg);
5452 // Src = (void*[n])(RHSArg);
5453 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5454 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5455 ArgsType), CGF.getPointerAlign());
5456 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5457 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5458 ArgsType), CGF.getPointerAlign());
5459
5460 // ...
5461 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5462 // ...
5463 CodeGenFunction::OMPPrivateScope Scope(CGF);
5464 auto IPriv = Privates.begin();
5465 unsigned Idx = 0;
5466 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5467 const auto *RHSVar =
5468 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5469 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5470 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5471 });
5472 const auto *LHSVar =
5473 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5474 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5475 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5476 });
5477 QualType PrivTy = (*IPriv)->getType();
5478 if (PrivTy->isVariablyModifiedType()) {
5479 // Get array size and emit VLA type.
5480 ++Idx;
5481 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5482 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5483 const VariableArrayType *VLA =
5484 CGF.getContext().getAsVariableArrayType(PrivTy);
5485 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5486 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5487 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5488 CGF.EmitVariablyModifiedType(PrivTy);
5489 }
5490 }
5491 Scope.Privatize();
5492 IPriv = Privates.begin();
5493 auto ILHS = LHSExprs.begin();
5494 auto IRHS = RHSExprs.begin();
5495 for (const Expr *E : ReductionOps) {
5496 if ((*IPriv)->getType()->isArrayType()) {
5497 // Emit reduction for array section.
5498 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5499 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5500 EmitOMPAggregateReduction(
5501 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5502 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5503 emitReductionCombiner(CGF, E);
5504 });
5505 } else {
5506 // Emit reduction for array subscript or single variable.
5507 emitReductionCombiner(CGF, E);
5508 }
5509 ++IPriv;
5510 ++ILHS;
5511 ++IRHS;
5512 }
5513 Scope.ForceCleanup();
5514 CGF.FinishFunction();
5515 return Fn;
5516 }
5517
emitSingleReductionCombiner(CodeGenFunction & CGF,const Expr * ReductionOp,const Expr * PrivateRef,const DeclRefExpr * LHS,const DeclRefExpr * RHS)5518 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5519 const Expr *ReductionOp,
5520 const Expr *PrivateRef,
5521 const DeclRefExpr *LHS,
5522 const DeclRefExpr *RHS) {
5523 if (PrivateRef->getType()->isArrayType()) {
5524 // Emit reduction for array section.
5525 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5526 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5527 EmitOMPAggregateReduction(
5528 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5529 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5530 emitReductionCombiner(CGF, ReductionOp);
5531 });
5532 } else {
5533 // Emit reduction for array subscript or single variable.
5534 emitReductionCombiner(CGF, ReductionOp);
5535 }
5536 }
5537
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)5538 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5539 ArrayRef<const Expr *> Privates,
5540 ArrayRef<const Expr *> LHSExprs,
5541 ArrayRef<const Expr *> RHSExprs,
5542 ArrayRef<const Expr *> ReductionOps,
5543 ReductionOptionsTy Options) {
5544 if (!CGF.HaveInsertPoint())
5545 return;
5546
5547 bool WithNowait = Options.WithNowait;
5548 bool SimpleReduction = Options.SimpleReduction;
5549
5550 // Next code should be emitted for reduction:
5551 //
5552 // static kmp_critical_name lock = { 0 };
5553 //
5554 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5555 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5556 // ...
5557 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5558 // *(Type<n>-1*)rhs[<n>-1]);
5559 // }
5560 //
5561 // ...
5562 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5563 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5564 // RedList, reduce_func, &<lock>)) {
5565 // case 1:
5566 // ...
5567 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5568 // ...
5569 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5570 // break;
5571 // case 2:
5572 // ...
5573 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5574 // ...
5575 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5576 // break;
5577 // default:;
5578 // }
5579 //
5580 // if SimpleReduction is true, only the next code is generated:
5581 // ...
5582 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5583 // ...
5584
5585 ASTContext &C = CGM.getContext();
5586
5587 if (SimpleReduction) {
5588 CodeGenFunction::RunCleanupsScope Scope(CGF);
5589 auto IPriv = Privates.begin();
5590 auto ILHS = LHSExprs.begin();
5591 auto IRHS = RHSExprs.begin();
5592 for (const Expr *E : ReductionOps) {
5593 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5594 cast<DeclRefExpr>(*IRHS));
5595 ++IPriv;
5596 ++ILHS;
5597 ++IRHS;
5598 }
5599 return;
5600 }
5601
5602 // 1. Build a list of reduction variables.
5603 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5604 auto Size = RHSExprs.size();
5605 for (const Expr *E : Privates) {
5606 if (E->getType()->isVariablyModifiedType())
5607 // Reserve place for array size.
5608 ++Size;
5609 }
5610 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5611 QualType ReductionArrayTy =
5612 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5613 /*IndexTypeQuals=*/0);
5614 Address ReductionList =
5615 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5616 auto IPriv = Privates.begin();
5617 unsigned Idx = 0;
5618 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5619 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5620 CGF.Builder.CreateStore(
5621 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5622 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5623 Elem);
5624 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5625 // Store array size.
5626 ++Idx;
5627 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5628 llvm::Value *Size = CGF.Builder.CreateIntCast(
5629 CGF.getVLASize(
5630 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5631 .NumElts,
5632 CGF.SizeTy, /*isSigned=*/false);
5633 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5634 Elem);
5635 }
5636 }
5637
5638 // 2. Emit reduce_func().
5639 llvm::Function *ReductionFn = emitReductionFunction(
5640 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5641 LHSExprs, RHSExprs, ReductionOps);
5642
5643 // 3. Create static kmp_critical_name lock = { 0 };
5644 std::string Name = getName({"reduction"});
5645 llvm::Value *Lock = getCriticalRegionLock(Name);
5646
5647 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5648 // RedList, reduce_func, &<lock>);
5649 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5650 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5651 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5652 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5653 ReductionList.getPointer(), CGF.VoidPtrTy);
5654 llvm::Value *Args[] = {
5655 IdentTLoc, // ident_t *<loc>
5656 ThreadId, // i32 <gtid>
5657 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5658 ReductionArrayTySize, // size_type sizeof(RedList)
5659 RL, // void *RedList
5660 ReductionFn, // void (*) (void *, void *) <reduce_func>
5661 Lock // kmp_critical_name *&<lock>
5662 };
5663 llvm::Value *Res = CGF.EmitRuntimeCall(
5664 OMPBuilder.getOrCreateRuntimeFunction(
5665 CGM.getModule(),
5666 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5667 Args);
5668
5669 // 5. Build switch(res)
5670 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5671 llvm::SwitchInst *SwInst =
5672 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5673
5674 // 6. Build case 1:
5675 // ...
5676 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5677 // ...
5678 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5679 // break;
5680 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5681 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5682 CGF.EmitBlock(Case1BB);
5683
5684 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685 llvm::Value *EndArgs[] = {
5686 IdentTLoc, // ident_t *<loc>
5687 ThreadId, // i32 <gtid>
5688 Lock // kmp_critical_name *&<lock>
5689 };
5690 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5691 CodeGenFunction &CGF, PrePostActionTy &Action) {
5692 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5693 auto IPriv = Privates.begin();
5694 auto ILHS = LHSExprs.begin();
5695 auto IRHS = RHSExprs.begin();
5696 for (const Expr *E : ReductionOps) {
5697 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5698 cast<DeclRefExpr>(*IRHS));
5699 ++IPriv;
5700 ++ILHS;
5701 ++IRHS;
5702 }
5703 };
5704 RegionCodeGenTy RCG(CodeGen);
5705 CommonActionTy Action(
5706 nullptr, llvm::None,
5707 OMPBuilder.getOrCreateRuntimeFunction(
5708 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5709 : OMPRTL___kmpc_end_reduce),
5710 EndArgs);
5711 RCG.setAction(Action);
5712 RCG(CGF);
5713
5714 CGF.EmitBranch(DefaultBB);
5715
5716 // 7. Build case 2:
5717 // ...
5718 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5719 // ...
5720 // break;
5721 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5722 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5723 CGF.EmitBlock(Case2BB);
5724
5725 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5726 CodeGenFunction &CGF, PrePostActionTy &Action) {
5727 auto ILHS = LHSExprs.begin();
5728 auto IRHS = RHSExprs.begin();
5729 auto IPriv = Privates.begin();
5730 for (const Expr *E : ReductionOps) {
5731 const Expr *XExpr = nullptr;
5732 const Expr *EExpr = nullptr;
5733 const Expr *UpExpr = nullptr;
5734 BinaryOperatorKind BO = BO_Comma;
5735 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5736 if (BO->getOpcode() == BO_Assign) {
5737 XExpr = BO->getLHS();
5738 UpExpr = BO->getRHS();
5739 }
5740 }
5741 // Try to emit update expression as a simple atomic.
5742 const Expr *RHSExpr = UpExpr;
5743 if (RHSExpr) {
5744 // Analyze RHS part of the whole expression.
5745 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5746 RHSExpr->IgnoreParenImpCasts())) {
5747 // If this is a conditional operator, analyze its condition for
5748 // min/max reduction operator.
5749 RHSExpr = ACO->getCond();
5750 }
5751 if (const auto *BORHS =
5752 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5753 EExpr = BORHS->getRHS();
5754 BO = BORHS->getOpcode();
5755 }
5756 }
5757 if (XExpr) {
5758 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5759 auto &&AtomicRedGen = [BO, VD,
5760 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5761 const Expr *EExpr, const Expr *UpExpr) {
5762 LValue X = CGF.EmitLValue(XExpr);
5763 RValue E;
5764 if (EExpr)
5765 E = CGF.EmitAnyExpr(EExpr);
5766 CGF.EmitOMPAtomicSimpleUpdateExpr(
5767 X, E, BO, /*IsXLHSInRHSPart=*/true,
5768 llvm::AtomicOrdering::Monotonic, Loc,
5769 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5770 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5771 PrivateScope.addPrivate(
5772 VD, [&CGF, VD, XRValue, Loc]() {
5773 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5774 CGF.emitOMPSimpleStore(
5775 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5776 VD->getType().getNonReferenceType(), Loc);
5777 return LHSTemp;
5778 });
5779 (void)PrivateScope.Privatize();
5780 return CGF.EmitAnyExpr(UpExpr);
5781 });
5782 };
5783 if ((*IPriv)->getType()->isArrayType()) {
5784 // Emit atomic reduction for array section.
5785 const auto *RHSVar =
5786 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5788 AtomicRedGen, XExpr, EExpr, UpExpr);
5789 } else {
5790 // Emit atomic reduction for array subscript or single variable.
5791 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5792 }
5793 } else {
5794 // Emit as a critical region.
5795 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5796 const Expr *, const Expr *) {
5797 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5798 std::string Name = RT.getName({"atomic_reduction"});
5799 RT.emitCriticalRegion(
5800 CGF, Name,
5801 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5802 Action.Enter(CGF);
5803 emitReductionCombiner(CGF, E);
5804 },
5805 Loc);
5806 };
5807 if ((*IPriv)->getType()->isArrayType()) {
5808 const auto *LHSVar =
5809 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5810 const auto *RHSVar =
5811 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5812 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5813 CritRedGen);
5814 } else {
5815 CritRedGen(CGF, nullptr, nullptr, nullptr);
5816 }
5817 }
5818 ++ILHS;
5819 ++IRHS;
5820 ++IPriv;
5821 }
5822 };
5823 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5824 if (!WithNowait) {
5825 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5826 llvm::Value *EndArgs[] = {
5827 IdentTLoc, // ident_t *<loc>
5828 ThreadId, // i32 <gtid>
5829 Lock // kmp_critical_name *&<lock>
5830 };
5831 CommonActionTy Action(nullptr, llvm::None,
5832 OMPBuilder.getOrCreateRuntimeFunction(
5833 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5834 EndArgs);
5835 AtomicRCG.setAction(Action);
5836 AtomicRCG(CGF);
5837 } else {
5838 AtomicRCG(CGF);
5839 }
5840
5841 CGF.EmitBranch(DefaultBB);
5842 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5843 }
5844
5845 /// Generates unique name for artificial threadprivate variables.
5846 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
generateUniqueName(CodeGenModule & CGM,StringRef Prefix,const Expr * Ref)5847 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5848 const Expr *Ref) {
5849 SmallString<256> Buffer;
5850 llvm::raw_svector_ostream Out(Buffer);
5851 const clang::DeclRefExpr *DE;
5852 const VarDecl *D = ::getBaseDecl(Ref, DE);
5853 if (!D)
5854 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5855 D = D->getCanonicalDecl();
5856 std::string Name = CGM.getOpenMPRuntime().getName(
5857 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5858 Out << Prefix << Name << "_"
5859 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5860 return std::string(Out.str());
5861 }
5862
5863 /// Emits reduction initializer function:
5864 /// \code
5865 /// void @.red_init(void* %arg, void* %orig) {
5866 /// %0 = bitcast void* %arg to <type>*
5867 /// store <type> <init>, <type>* %0
5868 /// ret void
5869 /// }
5870 /// \endcode
emitReduceInitFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)5871 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5872 SourceLocation Loc,
5873 ReductionCodeGen &RCG, unsigned N) {
5874 ASTContext &C = CGM.getContext();
5875 QualType VoidPtrTy = C.VoidPtrTy;
5876 VoidPtrTy.addRestrict();
5877 FunctionArgList Args;
5878 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5879 ImplicitParamDecl::Other);
5880 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5881 ImplicitParamDecl::Other);
5882 Args.emplace_back(&Param);
5883 Args.emplace_back(&ParamOrig);
5884 const auto &FnInfo =
5885 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5886 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5887 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5888 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5889 Name, &CGM.getModule());
5890 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5891 Fn->setDoesNotRecurse();
5892 CodeGenFunction CGF(CGM);
5893 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5894 Address PrivateAddr = CGF.EmitLoadOfPointer(
5895 CGF.GetAddrOfLocalVar(&Param),
5896 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5897 llvm::Value *Size = nullptr;
5898 // If the size of the reduction item is non-constant, load it from global
5899 // threadprivate variable.
5900 if (RCG.getSizes(N).second) {
5901 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5902 CGF, CGM.getContext().getSizeType(),
5903 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5904 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5905 CGM.getContext().getSizeType(), Loc);
5906 }
5907 RCG.emitAggregateType(CGF, N, Size);
5908 LValue OrigLVal;
5909 // If initializer uses initializer from declare reduction construct, emit a
5910 // pointer to the address of the original reduction item (reuired by reduction
5911 // initializer)
5912 if (RCG.usesReductionInitializer(N)) {
5913 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5914 SharedAddr = CGF.EmitLoadOfPointer(
5915 SharedAddr,
5916 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5917 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5918 } else {
5919 OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5920 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5921 CGM.getContext().VoidPtrTy);
5922 }
5923 // Emit the initializer:
5924 // %0 = bitcast void* %arg to <type>*
5925 // store <type> <init>, <type>* %0
5926 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5927 [](CodeGenFunction &) { return false; });
5928 CGF.FinishFunction();
5929 return Fn;
5930 }
5931
5932 /// Emits reduction combiner function:
5933 /// \code
5934 /// void @.red_comb(void* %arg0, void* %arg1) {
5935 /// %lhs = bitcast void* %arg0 to <type>*
5936 /// %rhs = bitcast void* %arg1 to <type>*
5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5938 /// store <type> %2, <type>* %lhs
5939 /// ret void
5940 /// }
5941 /// \endcode
emitReduceCombFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N,const Expr * ReductionOp,const Expr * LHS,const Expr * RHS,const Expr * PrivateRef)5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5943 SourceLocation Loc,
5944 ReductionCodeGen &RCG, unsigned N,
5945 const Expr *ReductionOp,
5946 const Expr *LHS, const Expr *RHS,
5947 const Expr *PrivateRef) {
5948 ASTContext &C = CGM.getContext();
5949 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5950 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5951 FunctionArgList Args;
5952 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5953 C.VoidPtrTy, ImplicitParamDecl::Other);
5954 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5955 ImplicitParamDecl::Other);
5956 Args.emplace_back(&ParamInOut);
5957 Args.emplace_back(&ParamIn);
5958 const auto &FnInfo =
5959 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5960 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5961 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5962 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5963 Name, &CGM.getModule());
5964 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5965 Fn->setDoesNotRecurse();
5966 CodeGenFunction CGF(CGM);
5967 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5968 llvm::Value *Size = nullptr;
5969 // If the size of the reduction item is non-constant, load it from global
5970 // threadprivate variable.
5971 if (RCG.getSizes(N).second) {
5972 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5973 CGF, CGM.getContext().getSizeType(),
5974 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5975 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5976 CGM.getContext().getSizeType(), Loc);
5977 }
5978 RCG.emitAggregateType(CGF, N, Size);
5979 // Remap lhs and rhs variables to the addresses of the function arguments.
5980 // %lhs = bitcast void* %arg0 to <type>*
5981 // %rhs = bitcast void* %arg1 to <type>*
5982 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5983 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5984 // Pull out the pointer to the variable.
5985 Address PtrAddr = CGF.EmitLoadOfPointer(
5986 CGF.GetAddrOfLocalVar(&ParamInOut),
5987 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988 return CGF.Builder.CreateElementBitCast(
5989 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5990 });
5991 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5992 // Pull out the pointer to the variable.
5993 Address PtrAddr = CGF.EmitLoadOfPointer(
5994 CGF.GetAddrOfLocalVar(&ParamIn),
5995 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996 return CGF.Builder.CreateElementBitCast(
5997 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5998 });
5999 PrivateScope.Privatize();
6000 // Emit the combiner body:
6001 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6002 // store <type> %2, <type>* %lhs
6003 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6004 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6005 cast<DeclRefExpr>(RHS));
6006 CGF.FinishFunction();
6007 return Fn;
6008 }
6009
6010 /// Emits reduction finalizer function:
6011 /// \code
6012 /// void @.red_fini(void* %arg) {
6013 /// %0 = bitcast void* %arg to <type>*
6014 /// <destroy>(<type>* %0)
6015 /// ret void
6016 /// }
6017 /// \endcode
emitReduceFiniFunction(CodeGenModule & CGM,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6019 SourceLocation Loc,
6020 ReductionCodeGen &RCG, unsigned N) {
6021 if (!RCG.needCleanups(N))
6022 return nullptr;
6023 ASTContext &C = CGM.getContext();
6024 FunctionArgList Args;
6025 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6026 ImplicitParamDecl::Other);
6027 Args.emplace_back(&Param);
6028 const auto &FnInfo =
6029 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6030 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6031 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6032 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6033 Name, &CGM.getModule());
6034 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6035 Fn->setDoesNotRecurse();
6036 CodeGenFunction CGF(CGM);
6037 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6038 Address PrivateAddr = CGF.EmitLoadOfPointer(
6039 CGF.GetAddrOfLocalVar(&Param),
6040 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6041 llvm::Value *Size = nullptr;
6042 // If the size of the reduction item is non-constant, load it from global
6043 // threadprivate variable.
6044 if (RCG.getSizes(N).second) {
6045 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6046 CGF, CGM.getContext().getSizeType(),
6047 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6048 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6049 CGM.getContext().getSizeType(), Loc);
6050 }
6051 RCG.emitAggregateType(CGF, N, Size);
6052 // Emit the finalizer body:
6053 // <destroy>(<type>* %0)
6054 RCG.emitCleanups(CGF, N, PrivateAddr);
6055 CGF.FinishFunction(Loc);
6056 return Fn;
6057 }
6058
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6060 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6061 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6062 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6063 return nullptr;
6064
6065 // Build typedef struct:
6066 // kmp_taskred_input {
6067 // void *reduce_shar; // shared reduction item
6068 // void *reduce_orig; // original reduction item used for initialization
6069 // size_t reduce_size; // size of data item
6070 // void *reduce_init; // data initialization routine
6071 // void *reduce_fini; // data finalization routine
6072 // void *reduce_comb; // data combiner routine
6073 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6074 // } kmp_taskred_input_t;
6075 ASTContext &C = CGM.getContext();
6076 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6077 RD->startDefinition();
6078 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6081 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6085 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6086 RD->completeDefinition();
6087 QualType RDType = C.getRecordType(RD);
6088 unsigned Size = Data.ReductionVars.size();
6089 llvm::APInt ArraySize(/*numBits=*/64, Size);
6090 QualType ArrayRDType = C.getConstantArrayType(
6091 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6092 // kmp_task_red_input_t .rd_input.[Size];
6093 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6094 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6095 Data.ReductionCopies, Data.ReductionOps);
6096 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6097 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6098 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6099 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6100 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6101 TaskRedInput.getPointer(), Idxs,
6102 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6103 ".rd_input.gep.");
6104 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6105 // ElemLVal.reduce_shar = &Shareds[Cnt];
6106 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6107 RCG.emitSharedOrigLValue(CGF, Cnt);
6108 llvm::Value *CastedShared =
6109 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6110 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6111 // ElemLVal.reduce_orig = &Origs[Cnt];
6112 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6113 llvm::Value *CastedOrig =
6114 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6115 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6116 RCG.emitAggregateType(CGF, Cnt);
6117 llvm::Value *SizeValInChars;
6118 llvm::Value *SizeVal;
6119 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6120 // We use delayed creation/initialization for VLAs and array sections. It is
6121 // required because runtime does not provide the way to pass the sizes of
6122 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6123 // threadprivate global variables are used to store these values and use
6124 // them in the functions.
6125 bool DelayedCreation = !!SizeVal;
6126 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6127 /*isSigned=*/false);
6128 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6129 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6130 // ElemLVal.reduce_init = init;
6131 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6132 llvm::Value *InitAddr =
6133 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6134 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6135 // ElemLVal.reduce_fini = fini;
6136 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6137 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6138 llvm::Value *FiniAddr = Fini
6139 ? CGF.EmitCastToVoidPtr(Fini)
6140 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6141 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6142 // ElemLVal.reduce_comb = comb;
6143 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6144 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6145 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6146 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6147 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6148 // ElemLVal.flags = 0;
6149 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6150 if (DelayedCreation) {
6151 CGF.EmitStoreOfScalar(
6152 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6153 FlagsLVal);
6154 } else
6155 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6156 FlagsLVal.getType());
6157 }
6158 if (Data.IsReductionWithTaskMod) {
6159 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160 // is_ws, int num, void *data);
6161 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163 CGM.IntTy, /*isSigned=*/true);
6164 llvm::Value *Args[] = {
6165 IdentTLoc, GTid,
6166 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6167 /*isSigned=*/true),
6168 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6169 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6170 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6171 return CGF.EmitRuntimeCall(
6172 OMPBuilder.getOrCreateRuntimeFunction(
6173 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6174 Args);
6175 }
6176 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6177 llvm::Value *Args[] = {
6178 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6179 /*isSigned=*/true),
6180 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6181 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6182 CGM.VoidPtrTy)};
6183 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6184 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6185 Args);
6186 }
6187
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6189 SourceLocation Loc,
6190 bool IsWorksharingReduction) {
6191 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6192 // is_ws, int num, void *data);
6193 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6194 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6195 CGM.IntTy, /*isSigned=*/true);
6196 llvm::Value *Args[] = {IdentTLoc, GTid,
6197 llvm::ConstantInt::get(CGM.IntTy,
6198 IsWorksharingReduction ? 1 : 0,
6199 /*isSigned=*/true)};
6200 (void)CGF.EmitRuntimeCall(
6201 OMPBuilder.getOrCreateRuntimeFunction(
6202 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6203 Args);
6204 }
6205
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6207 SourceLocation Loc,
6208 ReductionCodeGen &RCG,
6209 unsigned N) {
6210 auto Sizes = RCG.getSizes(N);
6211 // Emit threadprivate global variable if the type is non-constant
6212 // (Sizes.second = nullptr).
6213 if (Sizes.second) {
6214 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6215 /*isSigned=*/false);
6216 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6217 CGF, CGM.getContext().getSizeType(),
6218 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6219 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6220 }
6221 }
6222
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6224 SourceLocation Loc,
6225 llvm::Value *ReductionsPtr,
6226 LValue SharedLVal) {
6227 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6228 // *d);
6229 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6230 CGM.IntTy,
6231 /*isSigned=*/true),
6232 ReductionsPtr,
6233 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6234 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6235 return Address(
6236 CGF.EmitRuntimeCall(
6237 OMPBuilder.getOrCreateRuntimeFunction(
6238 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6239 Args),
6240 SharedLVal.getAlignment());
6241 }
6242
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6244 SourceLocation Loc) {
6245 if (!CGF.HaveInsertPoint())
6246 return;
6247
6248 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6249 OMPBuilder.createTaskwait(CGF.Builder);
6250 } else {
6251 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6252 // global_tid);
6253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6254 // Ignore return result until untied tasks are supported.
6255 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6256 CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6257 Args);
6258 }
6259
6260 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6261 Region->emitUntiedSwitch(CGF);
6262 }
6263
emitInlinedDirective(CodeGenFunction & CGF,OpenMPDirectiveKind InnerKind,const RegionCodeGenTy & CodeGen,bool HasCancel)6264 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6265 OpenMPDirectiveKind InnerKind,
6266 const RegionCodeGenTy &CodeGen,
6267 bool HasCancel) {
6268 if (!CGF.HaveInsertPoint())
6269 return;
6270 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6271 InnerKind != OMPD_critical &&
6272 InnerKind != OMPD_master &&
6273 InnerKind != OMPD_masked);
6274 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6275 }
6276
6277 namespace {
6278 enum RTCancelKind {
6279 CancelNoreq = 0,
6280 CancelParallel = 1,
6281 CancelLoop = 2,
6282 CancelSections = 3,
6283 CancelTaskgroup = 4
6284 };
6285 } // anonymous namespace
6286
getCancellationKind(OpenMPDirectiveKind CancelRegion)6287 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6288 RTCancelKind CancelKind = CancelNoreq;
6289 if (CancelRegion == OMPD_parallel)
6290 CancelKind = CancelParallel;
6291 else if (CancelRegion == OMPD_for)
6292 CancelKind = CancelLoop;
6293 else if (CancelRegion == OMPD_sections)
6294 CancelKind = CancelSections;
6295 else {
6296 assert(CancelRegion == OMPD_taskgroup);
6297 CancelKind = CancelTaskgroup;
6298 }
6299 return CancelKind;
6300 }
6301
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)6302 void CGOpenMPRuntime::emitCancellationPointCall(
6303 CodeGenFunction &CGF, SourceLocation Loc,
6304 OpenMPDirectiveKind CancelRegion) {
6305 if (!CGF.HaveInsertPoint())
6306 return;
6307 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6308 // global_tid, kmp_int32 cncl_kind);
6309 if (auto *OMPRegionInfo =
6310 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6311 // For 'cancellation point taskgroup', the task region info may not have a
6312 // cancel. This may instead happen in another adjacent task.
6313 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6314 llvm::Value *Args[] = {
6315 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6316 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6317 // Ignore return result until untied tasks are supported.
6318 llvm::Value *Result = CGF.EmitRuntimeCall(
6319 OMPBuilder.getOrCreateRuntimeFunction(
6320 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6321 Args);
6322 // if (__kmpc_cancellationpoint()) {
6323 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6324 // exit from construct;
6325 // }
6326 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6327 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6328 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6329 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6330 CGF.EmitBlock(ExitBB);
6331 if (CancelRegion == OMPD_parallel)
6332 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6333 // exit from construct;
6334 CodeGenFunction::JumpDest CancelDest =
6335 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6336 CGF.EmitBranchThroughCleanup(CancelDest);
6337 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6338 }
6339 }
6340 }
6341
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)6342 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6343 const Expr *IfCond,
6344 OpenMPDirectiveKind CancelRegion) {
6345 if (!CGF.HaveInsertPoint())
6346 return;
6347 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6348 // kmp_int32 cncl_kind);
6349 auto &M = CGM.getModule();
6350 if (auto *OMPRegionInfo =
6351 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6352 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6353 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6354 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6355 llvm::Value *Args[] = {
6356 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6357 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6358 // Ignore return result until untied tasks are supported.
6359 llvm::Value *Result = CGF.EmitRuntimeCall(
6360 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6361 // if (__kmpc_cancel()) {
6362 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6363 // exit from construct;
6364 // }
6365 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6366 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6367 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6368 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6369 CGF.EmitBlock(ExitBB);
6370 if (CancelRegion == OMPD_parallel)
6371 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6372 // exit from construct;
6373 CodeGenFunction::JumpDest CancelDest =
6374 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6375 CGF.EmitBranchThroughCleanup(CancelDest);
6376 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6377 };
6378 if (IfCond) {
6379 emitIfClause(CGF, IfCond, ThenGen,
6380 [](CodeGenFunction &, PrePostActionTy &) {});
6381 } else {
6382 RegionCodeGenTy ThenRCG(ThenGen);
6383 ThenRCG(CGF);
6384 }
6385 }
6386 }
6387
6388 namespace {
6389 /// Cleanup action for uses_allocators support.
6390 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6391 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6392
6393 public:
OMPUsesAllocatorsActionTy(ArrayRef<std::pair<const Expr *,const Expr * >> Allocators)6394 OMPUsesAllocatorsActionTy(
6395 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6396 : Allocators(Allocators) {}
Enter(CodeGenFunction & CGF)6397 void Enter(CodeGenFunction &CGF) override {
6398 if (!CGF.HaveInsertPoint())
6399 return;
6400 for (const auto &AllocatorData : Allocators) {
6401 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6402 CGF, AllocatorData.first, AllocatorData.second);
6403 }
6404 }
Exit(CodeGenFunction & CGF)6405 void Exit(CodeGenFunction &CGF) override {
6406 if (!CGF.HaveInsertPoint())
6407 return;
6408 for (const auto &AllocatorData : Allocators) {
6409 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6410 AllocatorData.first);
6411 }
6412 }
6413 };
6414 } // namespace
6415
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6416 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6417 const OMPExecutableDirective &D, StringRef ParentName,
6418 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6419 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6420 assert(!ParentName.empty() && "Invalid target region parent name!");
6421 HasEmittedTargetRegion = true;
6422 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6423 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6424 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6425 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6426 if (!D.AllocatorTraits)
6427 continue;
6428 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6429 }
6430 }
6431 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6432 CodeGen.setAction(UsesAllocatorAction);
6433 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6434 IsOffloadEntry, CodeGen);
6435 }
6436
emitUsesAllocatorsInit(CodeGenFunction & CGF,const Expr * Allocator,const Expr * AllocatorTraits)6437 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6438 const Expr *Allocator,
6439 const Expr *AllocatorTraits) {
6440 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6441 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6442 // Use default memspace handle.
6443 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6444 llvm::Value *NumTraits = llvm::ConstantInt::get(
6445 CGF.IntTy, cast<ConstantArrayType>(
6446 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6447 ->getSize()
6448 .getLimitedValue());
6449 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6450 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6451 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6452 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6453 AllocatorTraitsLVal.getBaseInfo(),
6454 AllocatorTraitsLVal.getTBAAInfo());
6455 llvm::Value *Traits =
6456 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6457
6458 llvm::Value *AllocatorVal =
6459 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6460 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6461 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6462 // Store to allocator.
6463 CGF.EmitVarDecl(*cast<VarDecl>(
6464 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6465 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6466 AllocatorVal =
6467 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6468 Allocator->getType(), Allocator->getExprLoc());
6469 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6470 }
6471
emitUsesAllocatorsFini(CodeGenFunction & CGF,const Expr * Allocator)6472 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6473 const Expr *Allocator) {
6474 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6477 llvm::Value *AllocatorVal =
6478 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6479 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6480 CGF.getContext().VoidPtrTy,
6481 Allocator->getExprLoc());
6482 (void)CGF.EmitRuntimeCall(
6483 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6484 OMPRTL___kmpc_destroy_allocator),
6485 {ThreadId, AllocatorVal});
6486 }
6487
emitTargetOutlinedFunctionHelper(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6489 const OMPExecutableDirective &D, StringRef ParentName,
6490 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6491 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6492 // Create a unique name for the entry function using the source location
6493 // information of the current target region. The name will be something like:
6494 //
6495 // __omp_offloading_DD_FFFF_PP_lBB
6496 //
6497 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6498 // mangled name of the function that encloses the target region and BB is the
6499 // line number of the target region.
6500
6501 unsigned DeviceID;
6502 unsigned FileID;
6503 unsigned Line;
6504 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505 Line);
6506 SmallString<64> EntryFnName;
6507 {
6508 llvm::raw_svector_ostream OS(EntryFnName);
6509 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511 }
6512
6513 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514
6515 CodeGenFunction CGF(CGM, true);
6516 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518
6519 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6520
6521 // If this target outline function is not an offload entry, we don't need to
6522 // register it.
6523 if (!IsOffloadEntry)
6524 return;
6525
6526 // The target region ID is used by the runtime library to identify the current
6527 // target region, so it only has to be unique and not necessarily point to
6528 // anything. It could be the pointer to the outlined function that implements
6529 // the target region, but we aren't using that so that the compiler doesn't
6530 // need to keep that, and could therefore inline the host function if proven
6531 // worthwhile during optimization. In the other hand, if emitting code for the
6532 // device, the ID has to be the function address so that it can retrieved from
6533 // the offloading entry and launched by the runtime library. We also mark the
6534 // outlined function to have external linkage in case we are emitting code for
6535 // the device, because these functions will be entry points to the device.
6536
6537 if (CGM.getLangOpts().OpenMPIsDevice) {
6538 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6539 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6540 OutlinedFn->setDSOLocal(false);
6541 if (CGM.getTriple().isAMDGCN())
6542 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6543 } else {
6544 std::string Name = getName({EntryFnName, "region_id"});
6545 OutlinedFnID = new llvm::GlobalVariable(
6546 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6547 llvm::GlobalValue::WeakAnyLinkage,
6548 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6549 }
6550
6551 // Register the information for the entry associated with this target region.
6552 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6553 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6554 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6555
6556 // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6557 int32_t DefaultValTeams = -1;
6558 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6559 if (DefaultValTeams > 0) {
6560 OutlinedFn->addFnAttr("omp_target_num_teams",
6561 std::to_string(DefaultValTeams));
6562 }
6563 int32_t DefaultValThreads = -1;
6564 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6565 if (DefaultValThreads > 0) {
6566 OutlinedFn->addFnAttr("omp_target_thread_limit",
6567 std::to_string(DefaultValThreads));
6568 }
6569 }
6570
6571 /// Checks if the expression is constant or does not have non-trivial function
6572 /// calls.
isTrivial(ASTContext & Ctx,const Expr * E)6573 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6574 // We can skip constant expressions.
6575 // We can skip expressions with trivial calls or simple expressions.
6576 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6577 !E->hasNonTrivialCall(Ctx)) &&
6578 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6579 }
6580
getSingleCompoundChild(ASTContext & Ctx,const Stmt * Body)6581 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6582 const Stmt *Body) {
6583 const Stmt *Child = Body->IgnoreContainers();
6584 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6585 Child = nullptr;
6586 for (const Stmt *S : C->body()) {
6587 if (const auto *E = dyn_cast<Expr>(S)) {
6588 if (isTrivial(Ctx, E))
6589 continue;
6590 }
6591 // Some of the statements can be ignored.
6592 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6593 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6594 continue;
6595 // Analyze declarations.
6596 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6597 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6598 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6599 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6600 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6601 isa<UsingDirectiveDecl>(D) ||
6602 isa<OMPDeclareReductionDecl>(D) ||
6603 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6604 return true;
6605 const auto *VD = dyn_cast<VarDecl>(D);
6606 if (!VD)
6607 return false;
6608 return VD->hasGlobalStorage() || !VD->isUsed();
6609 }))
6610 continue;
6611 }
6612 // Found multiple children - cannot get the one child only.
6613 if (Child)
6614 return nullptr;
6615 Child = S;
6616 }
6617 if (Child)
6618 Child = Child->IgnoreContainers();
6619 }
6620 return Child;
6621 }
6622
getNumTeamsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & DefaultVal)6623 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6624 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6625 int32_t &DefaultVal) {
6626
6627 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6628 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6629 "Expected target-based executable directive.");
6630 switch (DirectiveKind) {
6631 case OMPD_target: {
6632 const auto *CS = D.getInnermostCapturedStmt();
6633 const auto *Body =
6634 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6635 const Stmt *ChildStmt =
6636 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6637 if (const auto *NestedDir =
6638 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6639 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6640 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6641 const Expr *NumTeams =
6642 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6644 if (auto Constant =
6645 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6646 DefaultVal = Constant->getExtValue();
6647 return NumTeams;
6648 }
6649 DefaultVal = 0;
6650 return nullptr;
6651 }
6652 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6653 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6654 DefaultVal = 1;
6655 return nullptr;
6656 }
6657 DefaultVal = 1;
6658 return nullptr;
6659 }
6660 // A value of -1 is used to check if we need to emit no teams region
6661 DefaultVal = -1;
6662 return nullptr;
6663 }
6664 case OMPD_target_teams:
6665 case OMPD_target_teams_distribute:
6666 case OMPD_target_teams_distribute_simd:
6667 case OMPD_target_teams_distribute_parallel_for:
6668 case OMPD_target_teams_distribute_parallel_for_simd: {
6669 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6670 const Expr *NumTeams =
6671 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6673 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6674 DefaultVal = Constant->getExtValue();
6675 return NumTeams;
6676 }
6677 DefaultVal = 0;
6678 return nullptr;
6679 }
6680 case OMPD_target_parallel:
6681 case OMPD_target_parallel_for:
6682 case OMPD_target_parallel_for_simd:
6683 case OMPD_target_simd:
6684 DefaultVal = 1;
6685 return nullptr;
6686 case OMPD_parallel:
6687 case OMPD_for:
6688 case OMPD_parallel_for:
6689 case OMPD_parallel_master:
6690 case OMPD_parallel_sections:
6691 case OMPD_for_simd:
6692 case OMPD_parallel_for_simd:
6693 case OMPD_cancel:
6694 case OMPD_cancellation_point:
6695 case OMPD_ordered:
6696 case OMPD_threadprivate:
6697 case OMPD_allocate:
6698 case OMPD_task:
6699 case OMPD_simd:
6700 case OMPD_tile:
6701 case OMPD_unroll:
6702 case OMPD_sections:
6703 case OMPD_section:
6704 case OMPD_single:
6705 case OMPD_master:
6706 case OMPD_critical:
6707 case OMPD_taskyield:
6708 case OMPD_barrier:
6709 case OMPD_taskwait:
6710 case OMPD_taskgroup:
6711 case OMPD_atomic:
6712 case OMPD_flush:
6713 case OMPD_depobj:
6714 case OMPD_scan:
6715 case OMPD_teams:
6716 case OMPD_target_data:
6717 case OMPD_target_exit_data:
6718 case OMPD_target_enter_data:
6719 case OMPD_distribute:
6720 case OMPD_distribute_simd:
6721 case OMPD_distribute_parallel_for:
6722 case OMPD_distribute_parallel_for_simd:
6723 case OMPD_teams_distribute:
6724 case OMPD_teams_distribute_simd:
6725 case OMPD_teams_distribute_parallel_for:
6726 case OMPD_teams_distribute_parallel_for_simd:
6727 case OMPD_target_update:
6728 case OMPD_declare_simd:
6729 case OMPD_declare_variant:
6730 case OMPD_begin_declare_variant:
6731 case OMPD_end_declare_variant:
6732 case OMPD_declare_target:
6733 case OMPD_end_declare_target:
6734 case OMPD_declare_reduction:
6735 case OMPD_declare_mapper:
6736 case OMPD_taskloop:
6737 case OMPD_taskloop_simd:
6738 case OMPD_master_taskloop:
6739 case OMPD_master_taskloop_simd:
6740 case OMPD_parallel_master_taskloop:
6741 case OMPD_parallel_master_taskloop_simd:
6742 case OMPD_requires:
6743 case OMPD_unknown:
6744 break;
6745 default:
6746 break;
6747 }
6748 llvm_unreachable("Unexpected directive kind.");
6749 }
6750
emitNumTeamsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)6751 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6752 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6753 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6754 "Clauses associated with the teams directive expected to be emitted "
6755 "only for the host!");
6756 CGBuilderTy &Bld = CGF.Builder;
6757 int32_t DefaultNT = -1;
6758 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6759 if (NumTeams != nullptr) {
6760 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6761
6762 switch (DirectiveKind) {
6763 case OMPD_target: {
6764 const auto *CS = D.getInnermostCapturedStmt();
6765 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6768 /*IgnoreResultAssign*/ true);
6769 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6770 /*isSigned=*/true);
6771 }
6772 case OMPD_target_teams:
6773 case OMPD_target_teams_distribute:
6774 case OMPD_target_teams_distribute_simd:
6775 case OMPD_target_teams_distribute_parallel_for:
6776 case OMPD_target_teams_distribute_parallel_for_simd: {
6777 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6778 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6779 /*IgnoreResultAssign*/ true);
6780 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6781 /*isSigned=*/true);
6782 }
6783 default:
6784 break;
6785 }
6786 } else if (DefaultNT == -1) {
6787 return nullptr;
6788 }
6789
6790 return Bld.getInt32(DefaultNT);
6791 }
6792
getNumThreads(CodeGenFunction & CGF,const CapturedStmt * CS,llvm::Value * DefaultThreadLimitVal)6793 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6794 llvm::Value *DefaultThreadLimitVal) {
6795 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6796 CGF.getContext(), CS->getCapturedStmt());
6797 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6798 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6799 llvm::Value *NumThreads = nullptr;
6800 llvm::Value *CondVal = nullptr;
6801 // Handle if clause. If if clause present, the number of threads is
6802 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6803 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6804 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6805 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6806 const OMPIfClause *IfClause = nullptr;
6807 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6808 if (C->getNameModifier() == OMPD_unknown ||
6809 C->getNameModifier() == OMPD_parallel) {
6810 IfClause = C;
6811 break;
6812 }
6813 }
6814 if (IfClause) {
6815 const Expr *Cond = IfClause->getCondition();
6816 bool Result;
6817 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6818 if (!Result)
6819 return CGF.Builder.getInt32(1);
6820 } else {
6821 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6822 if (const auto *PreInit =
6823 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6824 for (const auto *I : PreInit->decls()) {
6825 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6826 CGF.EmitVarDecl(cast<VarDecl>(*I));
6827 } else {
6828 CodeGenFunction::AutoVarEmission Emission =
6829 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6830 CGF.EmitAutoVarCleanups(Emission);
6831 }
6832 }
6833 }
6834 CondVal = CGF.EvaluateExprAsBool(Cond);
6835 }
6836 }
6837 }
6838 // Check the value of num_threads clause iff if clause was not specified
6839 // or is not evaluated to false.
6840 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6841 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843 const auto *NumThreadsClause =
6844 Dir->getSingleClause<OMPNumThreadsClause>();
6845 CodeGenFunction::LexicalScope Scope(
6846 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6847 if (const auto *PreInit =
6848 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6849 for (const auto *I : PreInit->decls()) {
6850 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851 CGF.EmitVarDecl(cast<VarDecl>(*I));
6852 } else {
6853 CodeGenFunction::AutoVarEmission Emission =
6854 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855 CGF.EmitAutoVarCleanups(Emission);
6856 }
6857 }
6858 }
6859 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6860 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6861 /*isSigned=*/false);
6862 if (DefaultThreadLimitVal)
6863 NumThreads = CGF.Builder.CreateSelect(
6864 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6865 DefaultThreadLimitVal, NumThreads);
6866 } else {
6867 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6868 : CGF.Builder.getInt32(0);
6869 }
6870 // Process condition of the if clause.
6871 if (CondVal) {
6872 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6873 CGF.Builder.getInt32(1));
6874 }
6875 return NumThreads;
6876 }
6877 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6878 return CGF.Builder.getInt32(1);
6879 return DefaultThreadLimitVal;
6880 }
6881 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6882 : CGF.Builder.getInt32(0);
6883 }
6884
getNumThreadsExprForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D,int32_t & DefaultVal)6885 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6886 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6887 int32_t &DefaultVal) {
6888 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6889 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6890 "Expected target-based executable directive.");
6891
6892 switch (DirectiveKind) {
6893 case OMPD_target:
6894 // Teams have no clause thread_limit
6895 return nullptr;
6896 case OMPD_target_teams:
6897 case OMPD_target_teams_distribute:
6898 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6899 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6900 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6901 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6902 if (auto Constant =
6903 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6904 DefaultVal = Constant->getExtValue();
6905 return ThreadLimit;
6906 }
6907 return nullptr;
6908 case OMPD_target_parallel:
6909 case OMPD_target_parallel_for:
6910 case OMPD_target_parallel_for_simd:
6911 case OMPD_target_teams_distribute_parallel_for:
6912 case OMPD_target_teams_distribute_parallel_for_simd: {
6913 Expr *ThreadLimit = nullptr;
6914 Expr *NumThreads = nullptr;
6915 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6916 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6917 ThreadLimit = ThreadLimitClause->getThreadLimit();
6918 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6919 if (auto Constant =
6920 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6921 DefaultVal = Constant->getExtValue();
6922 }
6923 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6924 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6925 NumThreads = NumThreadsClause->getNumThreads();
6926 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6927 if (auto Constant =
6928 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6929 if (Constant->getExtValue() < DefaultVal) {
6930 DefaultVal = Constant->getExtValue();
6931 ThreadLimit = NumThreads;
6932 }
6933 }
6934 }
6935 }
6936 return ThreadLimit;
6937 }
6938 case OMPD_target_teams_distribute_simd:
6939 case OMPD_target_simd:
6940 DefaultVal = 1;
6941 return nullptr;
6942 case OMPD_parallel:
6943 case OMPD_for:
6944 case OMPD_parallel_for:
6945 case OMPD_parallel_master:
6946 case OMPD_parallel_sections:
6947 case OMPD_for_simd:
6948 case OMPD_parallel_for_simd:
6949 case OMPD_cancel:
6950 case OMPD_cancellation_point:
6951 case OMPD_ordered:
6952 case OMPD_threadprivate:
6953 case OMPD_allocate:
6954 case OMPD_task:
6955 case OMPD_simd:
6956 case OMPD_tile:
6957 case OMPD_unroll:
6958 case OMPD_sections:
6959 case OMPD_section:
6960 case OMPD_single:
6961 case OMPD_master:
6962 case OMPD_critical:
6963 case OMPD_taskyield:
6964 case OMPD_barrier:
6965 case OMPD_taskwait:
6966 case OMPD_taskgroup:
6967 case OMPD_atomic:
6968 case OMPD_flush:
6969 case OMPD_depobj:
6970 case OMPD_scan:
6971 case OMPD_teams:
6972 case OMPD_target_data:
6973 case OMPD_target_exit_data:
6974 case OMPD_target_enter_data:
6975 case OMPD_distribute:
6976 case OMPD_distribute_simd:
6977 case OMPD_distribute_parallel_for:
6978 case OMPD_distribute_parallel_for_simd:
6979 case OMPD_teams_distribute:
6980 case OMPD_teams_distribute_simd:
6981 case OMPD_teams_distribute_parallel_for:
6982 case OMPD_teams_distribute_parallel_for_simd:
6983 case OMPD_target_update:
6984 case OMPD_declare_simd:
6985 case OMPD_declare_variant:
6986 case OMPD_begin_declare_variant:
6987 case OMPD_end_declare_variant:
6988 case OMPD_declare_target:
6989 case OMPD_end_declare_target:
6990 case OMPD_declare_reduction:
6991 case OMPD_declare_mapper:
6992 case OMPD_taskloop:
6993 case OMPD_taskloop_simd:
6994 case OMPD_master_taskloop:
6995 case OMPD_master_taskloop_simd:
6996 case OMPD_parallel_master_taskloop:
6997 case OMPD_parallel_master_taskloop_simd:
6998 case OMPD_requires:
6999 case OMPD_unknown:
7000 break;
7001 default:
7002 break;
7003 }
7004 llvm_unreachable("Unsupported directive kind.");
7005 }
7006
emitNumThreadsForTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & D)7007 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7008 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7009 assert(!CGF.getLangOpts().OpenMPIsDevice &&
7010 "Clauses associated with the teams directive expected to be emitted "
7011 "only for the host!");
7012 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7013 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7014 "Expected target-based executable directive.");
7015 CGBuilderTy &Bld = CGF.Builder;
7016 llvm::Value *ThreadLimitVal = nullptr;
7017 llvm::Value *NumThreadsVal = nullptr;
7018 switch (DirectiveKind) {
7019 case OMPD_target: {
7020 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7021 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7022 return NumThreads;
7023 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7024 CGF.getContext(), CS->getCapturedStmt());
7025 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7026 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7027 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7028 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7029 const auto *ThreadLimitClause =
7030 Dir->getSingleClause<OMPThreadLimitClause>();
7031 CodeGenFunction::LexicalScope Scope(
7032 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7033 if (const auto *PreInit =
7034 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7035 for (const auto *I : PreInit->decls()) {
7036 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7037 CGF.EmitVarDecl(cast<VarDecl>(*I));
7038 } else {
7039 CodeGenFunction::AutoVarEmission Emission =
7040 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7041 CGF.EmitAutoVarCleanups(Emission);
7042 }
7043 }
7044 }
7045 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7046 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7047 ThreadLimitVal =
7048 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7049 }
7050 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7051 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7052 CS = Dir->getInnermostCapturedStmt();
7053 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7054 CGF.getContext(), CS->getCapturedStmt());
7055 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7056 }
7057 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7058 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7059 CS = Dir->getInnermostCapturedStmt();
7060 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7061 return NumThreads;
7062 }
7063 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7064 return Bld.getInt32(1);
7065 }
7066 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7067 }
7068 case OMPD_target_teams: {
7069 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7070 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7071 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7072 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7073 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7074 ThreadLimitVal =
7075 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7076 }
7077 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7078 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7079 return NumThreads;
7080 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7081 CGF.getContext(), CS->getCapturedStmt());
7082 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7083 if (Dir->getDirectiveKind() == OMPD_distribute) {
7084 CS = Dir->getInnermostCapturedStmt();
7085 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086 return NumThreads;
7087 }
7088 }
7089 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7090 }
7091 case OMPD_target_teams_distribute:
7092 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7093 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7094 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7095 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7096 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7097 ThreadLimitVal =
7098 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7099 }
7100 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7101 case OMPD_target_parallel:
7102 case OMPD_target_parallel_for:
7103 case OMPD_target_parallel_for_simd:
7104 case OMPD_target_teams_distribute_parallel_for:
7105 case OMPD_target_teams_distribute_parallel_for_simd: {
7106 llvm::Value *CondVal = nullptr;
7107 // Handle if clause. If if clause present, the number of threads is
7108 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7109 if (D.hasClausesOfKind<OMPIfClause>()) {
7110 const OMPIfClause *IfClause = nullptr;
7111 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7112 if (C->getNameModifier() == OMPD_unknown ||
7113 C->getNameModifier() == OMPD_parallel) {
7114 IfClause = C;
7115 break;
7116 }
7117 }
7118 if (IfClause) {
7119 const Expr *Cond = IfClause->getCondition();
7120 bool Result;
7121 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7122 if (!Result)
7123 return Bld.getInt32(1);
7124 } else {
7125 CodeGenFunction::RunCleanupsScope Scope(CGF);
7126 CondVal = CGF.EvaluateExprAsBool(Cond);
7127 }
7128 }
7129 }
7130 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7131 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7132 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7133 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7134 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7135 ThreadLimitVal =
7136 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7137 }
7138 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7139 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7140 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7141 llvm::Value *NumThreads = CGF.EmitScalarExpr(
7142 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7143 NumThreadsVal =
7144 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7145 ThreadLimitVal = ThreadLimitVal
7146 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7147 ThreadLimitVal),
7148 NumThreadsVal, ThreadLimitVal)
7149 : NumThreadsVal;
7150 }
7151 if (!ThreadLimitVal)
7152 ThreadLimitVal = Bld.getInt32(0);
7153 if (CondVal)
7154 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7155 return ThreadLimitVal;
7156 }
7157 case OMPD_target_teams_distribute_simd:
7158 case OMPD_target_simd:
7159 return Bld.getInt32(1);
7160 case OMPD_parallel:
7161 case OMPD_for:
7162 case OMPD_parallel_for:
7163 case OMPD_parallel_master:
7164 case OMPD_parallel_sections:
7165 case OMPD_for_simd:
7166 case OMPD_parallel_for_simd:
7167 case OMPD_cancel:
7168 case OMPD_cancellation_point:
7169 case OMPD_ordered:
7170 case OMPD_threadprivate:
7171 case OMPD_allocate:
7172 case OMPD_task:
7173 case OMPD_simd:
7174 case OMPD_tile:
7175 case OMPD_unroll:
7176 case OMPD_sections:
7177 case OMPD_section:
7178 case OMPD_single:
7179 case OMPD_master:
7180 case OMPD_critical:
7181 case OMPD_taskyield:
7182 case OMPD_barrier:
7183 case OMPD_taskwait:
7184 case OMPD_taskgroup:
7185 case OMPD_atomic:
7186 case OMPD_flush:
7187 case OMPD_depobj:
7188 case OMPD_scan:
7189 case OMPD_teams:
7190 case OMPD_target_data:
7191 case OMPD_target_exit_data:
7192 case OMPD_target_enter_data:
7193 case OMPD_distribute:
7194 case OMPD_distribute_simd:
7195 case OMPD_distribute_parallel_for:
7196 case OMPD_distribute_parallel_for_simd:
7197 case OMPD_teams_distribute:
7198 case OMPD_teams_distribute_simd:
7199 case OMPD_teams_distribute_parallel_for:
7200 case OMPD_teams_distribute_parallel_for_simd:
7201 case OMPD_target_update:
7202 case OMPD_declare_simd:
7203 case OMPD_declare_variant:
7204 case OMPD_begin_declare_variant:
7205 case OMPD_end_declare_variant:
7206 case OMPD_declare_target:
7207 case OMPD_end_declare_target:
7208 case OMPD_declare_reduction:
7209 case OMPD_declare_mapper:
7210 case OMPD_taskloop:
7211 case OMPD_taskloop_simd:
7212 case OMPD_master_taskloop:
7213 case OMPD_master_taskloop_simd:
7214 case OMPD_parallel_master_taskloop:
7215 case OMPD_parallel_master_taskloop_simd:
7216 case OMPD_requires:
7217 case OMPD_unknown:
7218 break;
7219 default:
7220 break;
7221 }
7222 llvm_unreachable("Unsupported directive kind.");
7223 }
7224
7225 namespace {
7226 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7227
7228 // Utility to handle information from clauses associated with a given
7229 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7230 // It provides a convenient interface to obtain the information and generate
7231 // code for that information.
7232 class MappableExprsHandler {
7233 public:
7234 /// Values for bit flags used to specify the mapping type for
7235 /// offloading.
7236 enum OpenMPOffloadMappingFlags : uint64_t {
7237 /// No flags
7238 OMP_MAP_NONE = 0x0,
7239 /// Allocate memory on the device and move data from host to device.
7240 OMP_MAP_TO = 0x01,
7241 /// Allocate memory on the device and move data from device to host.
7242 OMP_MAP_FROM = 0x02,
7243 /// Always perform the requested mapping action on the element, even
7244 /// if it was already mapped before.
7245 OMP_MAP_ALWAYS = 0x04,
7246 /// Delete the element from the device environment, ignoring the
7247 /// current reference count associated with the element.
7248 OMP_MAP_DELETE = 0x08,
7249 /// The element being mapped is a pointer-pointee pair; both the
7250 /// pointer and the pointee should be mapped.
7251 OMP_MAP_PTR_AND_OBJ = 0x10,
7252 /// This flags signals that the base address of an entry should be
7253 /// passed to the target kernel as an argument.
7254 OMP_MAP_TARGET_PARAM = 0x20,
7255 /// Signal that the runtime library has to return the device pointer
7256 /// in the current position for the data being mapped. Used when we have the
7257 /// use_device_ptr or use_device_addr clause.
7258 OMP_MAP_RETURN_PARAM = 0x40,
7259 /// This flag signals that the reference being passed is a pointer to
7260 /// private data.
7261 OMP_MAP_PRIVATE = 0x80,
7262 /// Pass the element to the device by value.
7263 OMP_MAP_LITERAL = 0x100,
7264 /// Implicit map
7265 OMP_MAP_IMPLICIT = 0x200,
7266 /// Close is a hint to the runtime to allocate memory close to
7267 /// the target device.
7268 OMP_MAP_CLOSE = 0x400,
7269 /// 0x800 is reserved for compatibility with XLC.
7270 /// Produce a runtime error if the data is not already allocated.
7271 OMP_MAP_PRESENT = 0x1000,
7272 /// Signal that the runtime library should use args as an array of
7273 /// descriptor_dim pointers and use args_size as dims. Used when we have
7274 /// non-contiguous list items in target update directive
7275 OMP_MAP_NON_CONTIG = 0x100000000000,
7276 /// The 16 MSBs of the flags indicate whether the entry is member of some
7277 /// struct/class.
7278 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7279 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7280 };
7281
7282 /// Get the offset of the OMP_MAP_MEMBER_OF field.
getFlagMemberOffset()7283 static unsigned getFlagMemberOffset() {
7284 unsigned Offset = 0;
7285 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7286 Remain = Remain >> 1)
7287 Offset++;
7288 return Offset;
7289 }
7290
7291 /// Class that holds debugging information for a data mapping to be passed to
7292 /// the runtime library.
7293 class MappingExprInfo {
7294 /// The variable declaration used for the data mapping.
7295 const ValueDecl *MapDecl = nullptr;
7296 /// The original expression used in the map clause, or null if there is
7297 /// none.
7298 const Expr *MapExpr = nullptr;
7299
7300 public:
MappingExprInfo(const ValueDecl * MapDecl,const Expr * MapExpr=nullptr)7301 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7302 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7303
getMapDecl() const7304 const ValueDecl *getMapDecl() const { return MapDecl; }
getMapExpr() const7305 const Expr *getMapExpr() const { return MapExpr; }
7306 };
7307
7308 /// Class that associates information with a base pointer to be passed to the
7309 /// runtime library.
7310 class BasePointerInfo {
7311 /// The base pointer.
7312 llvm::Value *Ptr = nullptr;
7313 /// The base declaration that refers to this device pointer, or null if
7314 /// there is none.
7315 const ValueDecl *DevPtrDecl = nullptr;
7316
7317 public:
BasePointerInfo(llvm::Value * Ptr,const ValueDecl * DevPtrDecl=nullptr)7318 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7319 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
operator *() const7320 llvm::Value *operator*() const { return Ptr; }
getDevicePtrDecl() const7321 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
setDevicePtrDecl(const ValueDecl * D)7322 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7323 };
7324
7325 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7326 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7327 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7328 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7329 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7330 using MapDimArrayTy = SmallVector<uint64_t, 4>;
7331 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7332
7333 /// This structure contains combined information generated for mappable
7334 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7335 /// mappers, and non-contiguous information.
7336 struct MapCombinedInfoTy {
7337 struct StructNonContiguousInfo {
7338 bool IsNonContiguous = false;
7339 MapDimArrayTy Dims;
7340 MapNonContiguousArrayTy Offsets;
7341 MapNonContiguousArrayTy Counts;
7342 MapNonContiguousArrayTy Strides;
7343 };
7344 MapExprsArrayTy Exprs;
7345 MapBaseValuesArrayTy BasePointers;
7346 MapValuesArrayTy Pointers;
7347 MapValuesArrayTy Sizes;
7348 MapFlagsArrayTy Types;
7349 MapMappersArrayTy Mappers;
7350 StructNonContiguousInfo NonContigInfo;
7351
7352 /// Append arrays in \a CurInfo.
append__anona0766a093d11::MappableExprsHandler::MapCombinedInfoTy7353 void append(MapCombinedInfoTy &CurInfo) {
7354 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7355 BasePointers.append(CurInfo.BasePointers.begin(),
7356 CurInfo.BasePointers.end());
7357 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7358 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7359 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7360 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7361 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7362 CurInfo.NonContigInfo.Dims.end());
7363 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7364 CurInfo.NonContigInfo.Offsets.end());
7365 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7366 CurInfo.NonContigInfo.Counts.end());
7367 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7368 CurInfo.NonContigInfo.Strides.end());
7369 }
7370 };
7371
7372 /// Map between a struct and the its lowest & highest elements which have been
7373 /// mapped.
7374 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7375 /// HE(FieldIndex, Pointer)}
7376 struct StructRangeInfoTy {
7377 MapCombinedInfoTy PreliminaryMapData;
7378 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7379 0, Address::invalid()};
7380 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7381 0, Address::invalid()};
7382 Address Base = Address::invalid();
7383 Address LB = Address::invalid();
7384 bool IsArraySection = false;
7385 bool HasCompleteRecord = false;
7386 };
7387
7388 private:
7389 /// Kind that defines how a device pointer has to be returned.
7390 struct MapInfo {
7391 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7392 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7393 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7394 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7395 bool ReturnDevicePointer = false;
7396 bool IsImplicit = false;
7397 const ValueDecl *Mapper = nullptr;
7398 const Expr *VarRef = nullptr;
7399 bool ForDeviceAddr = false;
7400
7401 MapInfo() = default;
MapInfo__anona0766a093d11::MappableExprsHandler::MapInfo7402 MapInfo(
7403 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7404 OpenMPMapClauseKind MapType,
7405 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7406 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7407 bool ReturnDevicePointer, bool IsImplicit,
7408 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7409 bool ForDeviceAddr = false)
7410 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7411 MotionModifiers(MotionModifiers),
7412 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7413 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7414 };
7415
7416 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7417 /// member and there is no map information about it, then emission of that
7418 /// entry is deferred until the whole struct has been processed.
7419 struct DeferredDevicePtrEntryTy {
7420 const Expr *IE = nullptr;
7421 const ValueDecl *VD = nullptr;
7422 bool ForDeviceAddr = false;
7423
DeferredDevicePtrEntryTy__anona0766a093d11::MappableExprsHandler::DeferredDevicePtrEntryTy7424 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7425 bool ForDeviceAddr)
7426 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7427 };
7428
7429 /// The target directive from where the mappable clauses were extracted. It
7430 /// is either a executable directive or a user-defined mapper directive.
7431 llvm::PointerUnion<const OMPExecutableDirective *,
7432 const OMPDeclareMapperDecl *>
7433 CurDir;
7434
7435 /// Function the directive is being generated for.
7436 CodeGenFunction &CGF;
7437
7438 /// Set of all first private variables in the current directive.
7439 /// bool data is set to true if the variable is implicitly marked as
7440 /// firstprivate, false otherwise.
7441 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7442
7443 /// Map between device pointer declarations and their expression components.
7444 /// The key value for declarations in 'this' is null.
7445 llvm::DenseMap<
7446 const ValueDecl *,
7447 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7448 DevPointersMap;
7449
getExprTypeSize(const Expr * E) const7450 llvm::Value *getExprTypeSize(const Expr *E) const {
7451 QualType ExprTy = E->getType().getCanonicalType();
7452
7453 // Calculate the size for array shaping expression.
7454 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7455 llvm::Value *Size =
7456 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7457 for (const Expr *SE : OAE->getDimensions()) {
7458 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7459 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7460 CGF.getContext().getSizeType(),
7461 SE->getExprLoc());
7462 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7463 }
7464 return Size;
7465 }
7466
7467 // Reference types are ignored for mapping purposes.
7468 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7469 ExprTy = RefTy->getPointeeType().getCanonicalType();
7470
7471 // Given that an array section is considered a built-in type, we need to
7472 // do the calculation based on the length of the section instead of relying
7473 // on CGF.getTypeSize(E->getType()).
7474 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7475 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7476 OAE->getBase()->IgnoreParenImpCasts())
7477 .getCanonicalType();
7478
7479 // If there is no length associated with the expression and lower bound is
7480 // not specified too, that means we are using the whole length of the
7481 // base.
7482 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7483 !OAE->getLowerBound())
7484 return CGF.getTypeSize(BaseTy);
7485
7486 llvm::Value *ElemSize;
7487 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7488 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7489 } else {
7490 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7491 assert(ATy && "Expecting array type if not a pointer type.");
7492 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7493 }
7494
7495 // If we don't have a length at this point, that is because we have an
7496 // array section with a single element.
7497 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7498 return ElemSize;
7499
7500 if (const Expr *LenExpr = OAE->getLength()) {
7501 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7502 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7503 CGF.getContext().getSizeType(),
7504 LenExpr->getExprLoc());
7505 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7506 }
7507 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7508 OAE->getLowerBound() && "expected array_section[lb:].");
7509 // Size = sizetype - lb * elemtype;
7510 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7511 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7512 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7513 CGF.getContext().getSizeType(),
7514 OAE->getLowerBound()->getExprLoc());
7515 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7516 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7517 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7518 LengthVal = CGF.Builder.CreateSelect(
7519 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7520 return LengthVal;
7521 }
7522 return CGF.getTypeSize(ExprTy);
7523 }
7524
7525 /// Return the corresponding bits for a given map clause modifier. Add
7526 /// a flag marking the map as a pointer if requested. Add a flag marking the
7527 /// map as the first one of a series of maps that relate to the same map
7528 /// expression.
getMapTypeBits(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,bool IsImplicit,bool AddPtrFlag,bool AddIsTargetParamFlag,bool IsNonContiguous) const7529 OpenMPOffloadMappingFlags getMapTypeBits(
7530 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7531 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7532 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7533 OpenMPOffloadMappingFlags Bits =
7534 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7535 switch (MapType) {
7536 case OMPC_MAP_alloc:
7537 case OMPC_MAP_release:
7538 // alloc and release is the default behavior in the runtime library, i.e.
7539 // if we don't pass any bits alloc/release that is what the runtime is
7540 // going to do. Therefore, we don't need to signal anything for these two
7541 // type modifiers.
7542 break;
7543 case OMPC_MAP_to:
7544 Bits |= OMP_MAP_TO;
7545 break;
7546 case OMPC_MAP_from:
7547 Bits |= OMP_MAP_FROM;
7548 break;
7549 case OMPC_MAP_tofrom:
7550 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7551 break;
7552 case OMPC_MAP_delete:
7553 Bits |= OMP_MAP_DELETE;
7554 break;
7555 case OMPC_MAP_unknown:
7556 llvm_unreachable("Unexpected map type!");
7557 }
7558 if (AddPtrFlag)
7559 Bits |= OMP_MAP_PTR_AND_OBJ;
7560 if (AddIsTargetParamFlag)
7561 Bits |= OMP_MAP_TARGET_PARAM;
7562 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7563 != MapModifiers.end())
7564 Bits |= OMP_MAP_ALWAYS;
7565 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7566 != MapModifiers.end())
7567 Bits |= OMP_MAP_CLOSE;
7568 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7569 MapModifiers.end() ||
7570 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7571 MotionModifiers.end())
7572 Bits |= OMP_MAP_PRESENT;
7573 if (IsNonContiguous)
7574 Bits |= OMP_MAP_NON_CONTIG;
7575 return Bits;
7576 }
7577
7578 /// Return true if the provided expression is a final array section. A
7579 /// final array section, is one whose length can't be proved to be one.
isFinalArraySectionExpression(const Expr * E) const7580 bool isFinalArraySectionExpression(const Expr *E) const {
7581 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7582
7583 // It is not an array section and therefore not a unity-size one.
7584 if (!OASE)
7585 return false;
7586
7587 // An array section with no colon always refer to a single element.
7588 if (OASE->getColonLocFirst().isInvalid())
7589 return false;
7590
7591 const Expr *Length = OASE->getLength();
7592
7593 // If we don't have a length we have to check if the array has size 1
7594 // for this dimension. Also, we should always expect a length if the
7595 // base type is pointer.
7596 if (!Length) {
7597 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7598 OASE->getBase()->IgnoreParenImpCasts())
7599 .getCanonicalType();
7600 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7601 return ATy->getSize().getSExtValue() != 1;
7602 // If we don't have a constant dimension length, we have to consider
7603 // the current section as having any size, so it is not necessarily
7604 // unitary. If it happen to be unity size, that's user fault.
7605 return true;
7606 }
7607
7608 // Check if the length evaluates to 1.
7609 Expr::EvalResult Result;
7610 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7611 return true; // Can have more that size 1.
7612
7613 llvm::APSInt ConstLength = Result.Val.getInt();
7614 return ConstLength.getSExtValue() != 1;
7615 }
7616
7617 /// Generate the base pointers, section pointers, sizes, map type bits, and
7618 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7619 /// map type, map or motion modifiers, and expression components.
7620 /// \a IsFirstComponent should be set to true if the provided set of
7621 /// components is the first associated with a capture.
generateInfoForComponentList(OpenMPMapClauseKind MapType,ArrayRef<OpenMPMapModifierKind> MapModifiers,ArrayRef<OpenMPMotionModifierKind> MotionModifiers,OMPClauseMappableExprCommon::MappableExprComponentListRef Components,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct,bool IsFirstComponentList,bool IsImplicit,const ValueDecl * Mapper=nullptr,bool ForDeviceAddr=false,const ValueDecl * BaseDecl=nullptr,const Expr * MapExpr=nullptr,ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements=llvm::None) const7622 void generateInfoForComponentList(
7623 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7624 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7625 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7626 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7627 bool IsFirstComponentList, bool IsImplicit,
7628 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7629 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7630 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7631 OverlappedElements = llvm::None) const {
7632 // The following summarizes what has to be generated for each map and the
7633 // types below. The generated information is expressed in this order:
7634 // base pointer, section pointer, size, flags
7635 // (to add to the ones that come from the map type and modifier).
7636 //
7637 // double d;
7638 // int i[100];
7639 // float *p;
7640 //
7641 // struct S1 {
7642 // int i;
7643 // float f[50];
7644 // }
7645 // struct S2 {
7646 // int i;
7647 // float f[50];
7648 // S1 s;
7649 // double *p;
7650 // struct S2 *ps;
7651 // int &ref;
7652 // }
7653 // S2 s;
7654 // S2 *ps;
7655 //
7656 // map(d)
7657 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7658 //
7659 // map(i)
7660 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7661 //
7662 // map(i[1:23])
7663 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7664 //
7665 // map(p)
7666 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7667 //
7668 // map(p[1:24])
7669 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7670 // in unified shared memory mode or for local pointers
7671 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7672 //
7673 // map(s)
7674 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7675 //
7676 // map(s.i)
7677 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7678 //
7679 // map(s.s.f)
7680 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7681 //
7682 // map(s.p)
7683 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7684 //
7685 // map(to: s.p[:22])
7686 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7687 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7688 // &(s.p), &(s.p[0]), 22*sizeof(double),
7689 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7690 // (*) alloc space for struct members, only this is a target parameter
7691 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7692 // optimizes this entry out, same in the examples below)
7693 // (***) map the pointee (map: to)
7694 //
7695 // map(to: s.ref)
7696 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7697 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7698 // (*) alloc space for struct members, only this is a target parameter
7699 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7700 // optimizes this entry out, same in the examples below)
7701 // (***) map the pointee (map: to)
7702 //
7703 // map(s.ps)
7704 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7705 //
7706 // map(from: s.ps->s.i)
7707 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7708 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7709 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7710 //
7711 // map(to: s.ps->ps)
7712 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7713 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7714 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7715 //
7716 // map(s.ps->ps->ps)
7717 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7718 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7719 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7720 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7721 //
7722 // map(to: s.ps->ps->s.f[:22])
7723 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7724 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7725 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7726 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7727 //
7728 // map(ps)
7729 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7730 //
7731 // map(ps->i)
7732 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7733 //
7734 // map(ps->s.f)
7735 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7736 //
7737 // map(from: ps->p)
7738 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7739 //
7740 // map(to: ps->p[:22])
7741 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7742 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7743 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7744 //
7745 // map(ps->ps)
7746 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7747 //
7748 // map(from: ps->ps->s.i)
7749 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7750 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7751 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7752 //
7753 // map(from: ps->ps->ps)
7754 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7755 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7756 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7757 //
7758 // map(ps->ps->ps->ps)
7759 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7760 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7761 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7762 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7763 //
7764 // map(to: ps->ps->ps->s.f[:22])
7765 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7766 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7767 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7768 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7769 //
7770 // map(to: s.f[:22]) map(from: s.p[:33])
7771 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7772 // sizeof(double*) (**), TARGET_PARAM
7773 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7774 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7775 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7776 // (*) allocate contiguous space needed to fit all mapped members even if
7777 // we allocate space for members not mapped (in this example,
7778 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7779 // them as well because they fall between &s.f[0] and &s.p)
7780 //
7781 // map(from: s.f[:22]) map(to: ps->p[:33])
7782 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7783 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7784 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7785 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7786 // (*) the struct this entry pertains to is the 2nd element in the list of
7787 // arguments, hence MEMBER_OF(2)
7788 //
7789 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7790 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7791 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7792 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7793 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7794 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7795 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7796 // (*) the struct this entry pertains to is the 4th element in the list
7797 // of arguments, hence MEMBER_OF(4)
7798
7799 // Track if the map information being generated is the first for a capture.
7800 bool IsCaptureFirstInfo = IsFirstComponentList;
7801 // When the variable is on a declare target link or in a to clause with
7802 // unified memory, a reference is needed to hold the host/device address
7803 // of the variable.
7804 bool RequiresReference = false;
7805
7806 // Scan the components from the base to the complete expression.
7807 auto CI = Components.rbegin();
7808 auto CE = Components.rend();
7809 auto I = CI;
7810
7811 // Track if the map information being generated is the first for a list of
7812 // components.
7813 bool IsExpressionFirstInfo = true;
7814 bool FirstPointerInComplexData = false;
7815 Address BP = Address::invalid();
7816 const Expr *AssocExpr = I->getAssociatedExpression();
7817 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7818 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7819 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7820
7821 if (isa<MemberExpr>(AssocExpr)) {
7822 // The base is the 'this' pointer. The content of the pointer is going
7823 // to be the base of the field being mapped.
7824 BP = CGF.LoadCXXThisAddress();
7825 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7826 (OASE &&
7827 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7828 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7829 } else if (OAShE &&
7830 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7831 BP = Address(
7832 CGF.EmitScalarExpr(OAShE->getBase()),
7833 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7834 } else {
7835 // The base is the reference to the variable.
7836 // BP = &Var.
7837 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7838 if (const auto *VD =
7839 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7840 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7841 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7842 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7843 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7844 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7845 RequiresReference = true;
7846 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7847 }
7848 }
7849 }
7850
7851 // If the variable is a pointer and is being dereferenced (i.e. is not
7852 // the last component), the base has to be the pointer itself, not its
7853 // reference. References are ignored for mapping purposes.
7854 QualType Ty =
7855 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7856 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7857 // No need to generate individual map information for the pointer, it
7858 // can be associated with the combined storage if shared memory mode is
7859 // active or the base declaration is not global variable.
7860 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7861 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7862 !VD || VD->hasLocalStorage())
7863 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7864 else
7865 FirstPointerInComplexData = true;
7866 ++I;
7867 }
7868 }
7869
7870 // Track whether a component of the list should be marked as MEMBER_OF some
7871 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7872 // in a component list should be marked as MEMBER_OF, all subsequent entries
7873 // do not belong to the base struct. E.g.
7874 // struct S2 s;
7875 // s.ps->ps->ps->f[:]
7876 // (1) (2) (3) (4)
7877 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7878 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7879 // is the pointee of ps(2) which is not member of struct s, so it should not
7880 // be marked as such (it is still PTR_AND_OBJ).
7881 // The variable is initialized to false so that PTR_AND_OBJ entries which
7882 // are not struct members are not considered (e.g. array of pointers to
7883 // data).
7884 bool ShouldBeMemberOf = false;
7885
7886 // Variable keeping track of whether or not we have encountered a component
7887 // in the component list which is a member expression. Useful when we have a
7888 // pointer or a final array section, in which case it is the previous
7889 // component in the list which tells us whether we have a member expression.
7890 // E.g. X.f[:]
7891 // While processing the final array section "[:]" it is "f" which tells us
7892 // whether we are dealing with a member of a declared struct.
7893 const MemberExpr *EncounteredME = nullptr;
7894
7895 // Track for the total number of dimension. Start from one for the dummy
7896 // dimension.
7897 uint64_t DimSize = 1;
7898
7899 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7900 bool IsPrevMemberReference = false;
7901
7902 for (; I != CE; ++I) {
7903 // If the current component is member of a struct (parent struct) mark it.
7904 if (!EncounteredME) {
7905 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7906 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7907 // as MEMBER_OF the parent struct.
7908 if (EncounteredME) {
7909 ShouldBeMemberOf = true;
7910 // Do not emit as complex pointer if this is actually not array-like
7911 // expression.
7912 if (FirstPointerInComplexData) {
7913 QualType Ty = std::prev(I)
7914 ->getAssociatedDeclaration()
7915 ->getType()
7916 .getNonReferenceType();
7917 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7918 FirstPointerInComplexData = false;
7919 }
7920 }
7921 }
7922
7923 auto Next = std::next(I);
7924
7925 // We need to generate the addresses and sizes if this is the last
7926 // component, if the component is a pointer or if it is an array section
7927 // whose length can't be proved to be one. If this is a pointer, it
7928 // becomes the base address for the following components.
7929
7930 // A final array section, is one whose length can't be proved to be one.
7931 // If the map item is non-contiguous then we don't treat any array section
7932 // as final array section.
7933 bool IsFinalArraySection =
7934 !IsNonContiguous &&
7935 isFinalArraySectionExpression(I->getAssociatedExpression());
7936
7937 // If we have a declaration for the mapping use that, otherwise use
7938 // the base declaration of the map clause.
7939 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7940 ? I->getAssociatedDeclaration()
7941 : BaseDecl;
7942 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7943 : MapExpr;
7944
7945 // Get information on whether the element is a pointer. Have to do a
7946 // special treatment for array sections given that they are built-in
7947 // types.
7948 const auto *OASE =
7949 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7950 const auto *OAShE =
7951 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7952 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7953 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7954 bool IsPointer =
7955 OAShE ||
7956 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7957 .getCanonicalType()
7958 ->isAnyPointerType()) ||
7959 I->getAssociatedExpression()->getType()->isAnyPointerType();
7960 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7961 MapDecl &&
7962 MapDecl->getType()->isLValueReferenceType();
7963 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7964
7965 if (OASE)
7966 ++DimSize;
7967
7968 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7969 IsFinalArraySection) {
7970 // If this is not the last component, we expect the pointer to be
7971 // associated with an array expression or member expression.
7972 assert((Next == CE ||
7973 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7974 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7975 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7976 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7977 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7978 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7979 "Unexpected expression");
7980
7981 Address LB = Address::invalid();
7982 Address LowestElem = Address::invalid();
7983 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7984 const MemberExpr *E) {
7985 const Expr *BaseExpr = E->getBase();
7986 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7987 // scalar.
7988 LValue BaseLV;
7989 if (E->isArrow()) {
7990 LValueBaseInfo BaseInfo;
7991 TBAAAccessInfo TBAAInfo;
7992 Address Addr =
7993 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7994 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7995 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7996 } else {
7997 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7998 }
7999 return BaseLV;
8000 };
8001 if (OAShE) {
8002 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8003 CGF.getContext().getTypeAlignInChars(
8004 OAShE->getBase()->getType()));
8005 } else if (IsMemberReference) {
8006 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8007 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8008 LowestElem = CGF.EmitLValueForFieldInitialization(
8009 BaseLVal, cast<FieldDecl>(MapDecl))
8010 .getAddress(CGF);
8011 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8012 .getAddress(CGF);
8013 } else {
8014 LowestElem = LB =
8015 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8016 .getAddress(CGF);
8017 }
8018
8019 // If this component is a pointer inside the base struct then we don't
8020 // need to create any entry for it - it will be combined with the object
8021 // it is pointing to into a single PTR_AND_OBJ entry.
8022 bool IsMemberPointerOrAddr =
8023 EncounteredME &&
8024 (((IsPointer || ForDeviceAddr) &&
8025 I->getAssociatedExpression() == EncounteredME) ||
8026 (IsPrevMemberReference && !IsPointer) ||
8027 (IsMemberReference && Next != CE &&
8028 !Next->getAssociatedExpression()->getType()->isPointerType()));
8029 if (!OverlappedElements.empty() && Next == CE) {
8030 // Handle base element with the info for overlapped elements.
8031 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8032 assert(!IsPointer &&
8033 "Unexpected base element with the pointer type.");
8034 // Mark the whole struct as the struct that requires allocation on the
8035 // device.
8036 PartialStruct.LowestElem = {0, LowestElem};
8037 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8038 I->getAssociatedExpression()->getType());
8039 Address HB = CGF.Builder.CreateConstGEP(
8040 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8041 CGF.VoidPtrTy),
8042 TypeSize.getQuantity() - 1);
8043 PartialStruct.HighestElem = {
8044 std::numeric_limits<decltype(
8045 PartialStruct.HighestElem.first)>::max(),
8046 HB};
8047 PartialStruct.Base = BP;
8048 PartialStruct.LB = LB;
8049 assert(
8050 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8051 "Overlapped elements must be used only once for the variable.");
8052 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8053 // Emit data for non-overlapped data.
8054 OpenMPOffloadMappingFlags Flags =
8055 OMP_MAP_MEMBER_OF |
8056 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8057 /*AddPtrFlag=*/false,
8058 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8059 llvm::Value *Size = nullptr;
8060 // Do bitcopy of all non-overlapped structure elements.
8061 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8062 Component : OverlappedElements) {
8063 Address ComponentLB = Address::invalid();
8064 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8065 Component) {
8066 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8067 const auto *FD = dyn_cast<FieldDecl>(VD);
8068 if (FD && FD->getType()->isLValueReferenceType()) {
8069 const auto *ME =
8070 cast<MemberExpr>(MC.getAssociatedExpression());
8071 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8072 ComponentLB =
8073 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8074 .getAddress(CGF);
8075 } else {
8076 ComponentLB =
8077 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8078 .getAddress(CGF);
8079 }
8080 Size = CGF.Builder.CreatePtrDiff(
8081 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8082 CGF.EmitCastToVoidPtr(LB.getPointer()));
8083 break;
8084 }
8085 }
8086 assert(Size && "Failed to determine structure size");
8087 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8088 CombinedInfo.BasePointers.push_back(BP.getPointer());
8089 CombinedInfo.Pointers.push_back(LB.getPointer());
8090 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8091 Size, CGF.Int64Ty, /*isSigned=*/true));
8092 CombinedInfo.Types.push_back(Flags);
8093 CombinedInfo.Mappers.push_back(nullptr);
8094 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8095 : 1);
8096 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8097 }
8098 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8099 CombinedInfo.BasePointers.push_back(BP.getPointer());
8100 CombinedInfo.Pointers.push_back(LB.getPointer());
8101 Size = CGF.Builder.CreatePtrDiff(
8102 CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8103 CGF.EmitCastToVoidPtr(LB.getPointer()));
8104 CombinedInfo.Sizes.push_back(
8105 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8106 CombinedInfo.Types.push_back(Flags);
8107 CombinedInfo.Mappers.push_back(nullptr);
8108 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8109 : 1);
8110 break;
8111 }
8112 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8113 if (!IsMemberPointerOrAddr ||
8114 (Next == CE && MapType != OMPC_MAP_unknown)) {
8115 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8116 CombinedInfo.BasePointers.push_back(BP.getPointer());
8117 CombinedInfo.Pointers.push_back(LB.getPointer());
8118 CombinedInfo.Sizes.push_back(
8119 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8120 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8121 : 1);
8122
8123 // If Mapper is valid, the last component inherits the mapper.
8124 bool HasMapper = Mapper && Next == CE;
8125 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8126
8127 // We need to add a pointer flag for each map that comes from the
8128 // same expression except for the first one. We also need to signal
8129 // this map is the first one that relates with the current capture
8130 // (there is a set of entries for each capture).
8131 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8132 MapType, MapModifiers, MotionModifiers, IsImplicit,
8133 !IsExpressionFirstInfo || RequiresReference ||
8134 FirstPointerInComplexData || IsMemberReference,
8135 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8136
8137 if (!IsExpressionFirstInfo || IsMemberReference) {
8138 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8139 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8140 if (IsPointer || (IsMemberReference && Next != CE))
8141 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8142 OMP_MAP_DELETE | OMP_MAP_CLOSE);
8143
8144 if (ShouldBeMemberOf) {
8145 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8146 // should be later updated with the correct value of MEMBER_OF.
8147 Flags |= OMP_MAP_MEMBER_OF;
8148 // From now on, all subsequent PTR_AND_OBJ entries should not be
8149 // marked as MEMBER_OF.
8150 ShouldBeMemberOf = false;
8151 }
8152 }
8153
8154 CombinedInfo.Types.push_back(Flags);
8155 }
8156
8157 // If we have encountered a member expression so far, keep track of the
8158 // mapped member. If the parent is "*this", then the value declaration
8159 // is nullptr.
8160 if (EncounteredME) {
8161 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8162 unsigned FieldIndex = FD->getFieldIndex();
8163
8164 // Update info about the lowest and highest elements for this struct
8165 if (!PartialStruct.Base.isValid()) {
8166 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8167 if (IsFinalArraySection) {
8168 Address HB =
8169 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8170 .getAddress(CGF);
8171 PartialStruct.HighestElem = {FieldIndex, HB};
8172 } else {
8173 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8174 }
8175 PartialStruct.Base = BP;
8176 PartialStruct.LB = BP;
8177 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8178 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8179 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8180 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8181 }
8182 }
8183
8184 // Need to emit combined struct for array sections.
8185 if (IsFinalArraySection || IsNonContiguous)
8186 PartialStruct.IsArraySection = true;
8187
8188 // If we have a final array section, we are done with this expression.
8189 if (IsFinalArraySection)
8190 break;
8191
8192 // The pointer becomes the base for the next element.
8193 if (Next != CE)
8194 BP = IsMemberReference ? LowestElem : LB;
8195
8196 IsExpressionFirstInfo = false;
8197 IsCaptureFirstInfo = false;
8198 FirstPointerInComplexData = false;
8199 IsPrevMemberReference = IsMemberReference;
8200 } else if (FirstPointerInComplexData) {
8201 QualType Ty = Components.rbegin()
8202 ->getAssociatedDeclaration()
8203 ->getType()
8204 .getNonReferenceType();
8205 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8206 FirstPointerInComplexData = false;
8207 }
8208 }
8209 // If ran into the whole component - allocate the space for the whole
8210 // record.
8211 if (!EncounteredME)
8212 PartialStruct.HasCompleteRecord = true;
8213
8214 if (!IsNonContiguous)
8215 return;
8216
8217 const ASTContext &Context = CGF.getContext();
8218
8219 // For supporting stride in array section, we need to initialize the first
8220 // dimension size as 1, first offset as 0, and first count as 1
8221 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8222 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8223 MapValuesArrayTy CurStrides;
8224 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8225 uint64_t ElementTypeSize;
8226
8227 // Collect Size information for each dimension and get the element size as
8228 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8229 // should be [10, 10] and the first stride is 4 btyes.
8230 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8231 Components) {
8232 const Expr *AssocExpr = Component.getAssociatedExpression();
8233 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8234
8235 if (!OASE)
8236 continue;
8237
8238 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8239 auto *CAT = Context.getAsConstantArrayType(Ty);
8240 auto *VAT = Context.getAsVariableArrayType(Ty);
8241
8242 // We need all the dimension size except for the last dimension.
8243 assert((VAT || CAT || &Component == &*Components.begin()) &&
8244 "Should be either ConstantArray or VariableArray if not the "
8245 "first Component");
8246
8247 // Get element size if CurStrides is empty.
8248 if (CurStrides.empty()) {
8249 const Type *ElementType = nullptr;
8250 if (CAT)
8251 ElementType = CAT->getElementType().getTypePtr();
8252 else if (VAT)
8253 ElementType = VAT->getElementType().getTypePtr();
8254 else
8255 assert(&Component == &*Components.begin() &&
8256 "Only expect pointer (non CAT or VAT) when this is the "
8257 "first Component");
8258 // If ElementType is null, then it means the base is a pointer
8259 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8260 // for next iteration.
8261 if (ElementType) {
8262 // For the case that having pointer as base, we need to remove one
8263 // level of indirection.
8264 if (&Component != &*Components.begin())
8265 ElementType = ElementType->getPointeeOrArrayElementType();
8266 ElementTypeSize =
8267 Context.getTypeSizeInChars(ElementType).getQuantity();
8268 CurStrides.push_back(
8269 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8270 }
8271 }
8272 // Get dimension value except for the last dimension since we don't need
8273 // it.
8274 if (DimSizes.size() < Components.size() - 1) {
8275 if (CAT)
8276 DimSizes.push_back(llvm::ConstantInt::get(
8277 CGF.Int64Ty, CAT->getSize().getZExtValue()));
8278 else if (VAT)
8279 DimSizes.push_back(CGF.Builder.CreateIntCast(
8280 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8281 /*IsSigned=*/false));
8282 }
8283 }
8284
8285 // Skip the dummy dimension since we have already have its information.
8286 auto DI = DimSizes.begin() + 1;
8287 // Product of dimension.
8288 llvm::Value *DimProd =
8289 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8290
8291 // Collect info for non-contiguous. Notice that offset, count, and stride
8292 // are only meaningful for array-section, so we insert a null for anything
8293 // other than array-section.
8294 // Also, the size of offset, count, and stride are not the same as
8295 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8296 // count, and stride are the same as the number of non-contiguous
8297 // declaration in target update to/from clause.
8298 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8299 Components) {
8300 const Expr *AssocExpr = Component.getAssociatedExpression();
8301
8302 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8303 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8304 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8305 /*isSigned=*/false);
8306 CurOffsets.push_back(Offset);
8307 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8308 CurStrides.push_back(CurStrides.back());
8309 continue;
8310 }
8311
8312 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8313
8314 if (!OASE)
8315 continue;
8316
8317 // Offset
8318 const Expr *OffsetExpr = OASE->getLowerBound();
8319 llvm::Value *Offset = nullptr;
8320 if (!OffsetExpr) {
8321 // If offset is absent, then we just set it to zero.
8322 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8323 } else {
8324 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8325 CGF.Int64Ty,
8326 /*isSigned=*/false);
8327 }
8328 CurOffsets.push_back(Offset);
8329
8330 // Count
8331 const Expr *CountExpr = OASE->getLength();
8332 llvm::Value *Count = nullptr;
8333 if (!CountExpr) {
8334 // In Clang, once a high dimension is an array section, we construct all
8335 // the lower dimension as array section, however, for case like
8336 // arr[0:2][2], Clang construct the inner dimension as an array section
8337 // but it actually is not in an array section form according to spec.
8338 if (!OASE->getColonLocFirst().isValid() &&
8339 !OASE->getColonLocSecond().isValid()) {
8340 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8341 } else {
8342 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8343 // When the length is absent it defaults to ⌈(size −
8344 // lower-bound)/stride⌉, where size is the size of the array
8345 // dimension.
8346 const Expr *StrideExpr = OASE->getStride();
8347 llvm::Value *Stride =
8348 StrideExpr
8349 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8350 CGF.Int64Ty, /*isSigned=*/false)
8351 : nullptr;
8352 if (Stride)
8353 Count = CGF.Builder.CreateUDiv(
8354 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8355 else
8356 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8357 }
8358 } else {
8359 Count = CGF.EmitScalarExpr(CountExpr);
8360 }
8361 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8362 CurCounts.push_back(Count);
8363
8364 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8365 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8366 // Offset Count Stride
8367 // D0 0 1 4 (int) <- dummy dimension
8368 // D1 0 2 8 (2 * (1) * 4)
8369 // D2 1 2 20 (1 * (1 * 5) * 4)
8370 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8371 const Expr *StrideExpr = OASE->getStride();
8372 llvm::Value *Stride =
8373 StrideExpr
8374 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8375 CGF.Int64Ty, /*isSigned=*/false)
8376 : nullptr;
8377 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8378 if (Stride)
8379 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8380 else
8381 CurStrides.push_back(DimProd);
8382 if (DI != DimSizes.end())
8383 ++DI;
8384 }
8385
8386 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8387 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8388 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8389 }
8390
8391 /// Return the adjusted map modifiers if the declaration a capture refers to
8392 /// appears in a first-private clause. This is expected to be used only with
8393 /// directives that start with 'target'.
8394 MappableExprsHandler::OpenMPOffloadMappingFlags
getMapModifiersForPrivateClauses(const CapturedStmt::Capture & Cap) const8395 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8396 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8397
8398 // A first private variable captured by reference will use only the
8399 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8400 // declaration is known as first-private in this handler.
8401 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8402 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8403 return MappableExprsHandler::OMP_MAP_TO |
8404 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8405 return MappableExprsHandler::OMP_MAP_PRIVATE |
8406 MappableExprsHandler::OMP_MAP_TO;
8407 }
8408 return MappableExprsHandler::OMP_MAP_TO |
8409 MappableExprsHandler::OMP_MAP_FROM;
8410 }
8411
getMemberOfFlag(unsigned Position)8412 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8413 // Rotate by getFlagMemberOffset() bits.
8414 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8415 << getFlagMemberOffset());
8416 }
8417
setCorrectMemberOfFlag(OpenMPOffloadMappingFlags & Flags,OpenMPOffloadMappingFlags MemberOfFlag)8418 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8419 OpenMPOffloadMappingFlags MemberOfFlag) {
8420 // If the entry is PTR_AND_OBJ but has not been marked with the special
8421 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8422 // marked as MEMBER_OF.
8423 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8424 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8425 return;
8426
8427 // Reset the placeholder value to prepare the flag for the assignment of the
8428 // proper MEMBER_OF value.
8429 Flags &= ~OMP_MAP_MEMBER_OF;
8430 Flags |= MemberOfFlag;
8431 }
8432
getPlainLayout(const CXXRecordDecl * RD,llvm::SmallVectorImpl<const FieldDecl * > & Layout,bool AsBase) const8433 void getPlainLayout(const CXXRecordDecl *RD,
8434 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8435 bool AsBase) const {
8436 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8437
8438 llvm::StructType *St =
8439 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8440
8441 unsigned NumElements = St->getNumElements();
8442 llvm::SmallVector<
8443 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8444 RecordLayout(NumElements);
8445
8446 // Fill bases.
8447 for (const auto &I : RD->bases()) {
8448 if (I.isVirtual())
8449 continue;
8450 const auto *Base = I.getType()->getAsCXXRecordDecl();
8451 // Ignore empty bases.
8452 if (Base->isEmpty() || CGF.getContext()
8453 .getASTRecordLayout(Base)
8454 .getNonVirtualSize()
8455 .isZero())
8456 continue;
8457
8458 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8459 RecordLayout[FieldIndex] = Base;
8460 }
8461 // Fill in virtual bases.
8462 for (const auto &I : RD->vbases()) {
8463 const auto *Base = I.getType()->getAsCXXRecordDecl();
8464 // Ignore empty bases.
8465 if (Base->isEmpty())
8466 continue;
8467 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8468 if (RecordLayout[FieldIndex])
8469 continue;
8470 RecordLayout[FieldIndex] = Base;
8471 }
8472 // Fill in all the fields.
8473 assert(!RD->isUnion() && "Unexpected union.");
8474 for (const auto *Field : RD->fields()) {
8475 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8476 // will fill in later.)
8477 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8478 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8479 RecordLayout[FieldIndex] = Field;
8480 }
8481 }
8482 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8483 &Data : RecordLayout) {
8484 if (Data.isNull())
8485 continue;
8486 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8487 getPlainLayout(Base, Layout, /*AsBase=*/true);
8488 else
8489 Layout.push_back(Data.get<const FieldDecl *>());
8490 }
8491 }
8492
8493 /// Generate all the base pointers, section pointers, sizes, map types, and
8494 /// mappers for the extracted mappable expressions (all included in \a
8495 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8496 /// pair of the relevant declaration and index where it occurs is appended to
8497 /// the device pointers info array.
generateAllInfoForClauses(ArrayRef<const OMPClause * > Clauses,MapCombinedInfoTy & CombinedInfo,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8498 void generateAllInfoForClauses(
8499 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8500 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8501 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8502 // We have to process the component lists that relate with the same
8503 // declaration in a single chunk so that we can generate the map flags
8504 // correctly. Therefore, we organize all lists in a map.
8505 enum MapKind { Present, Allocs, Other, Total };
8506 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8507 SmallVector<SmallVector<MapInfo, 8>, 4>>
8508 Info;
8509
8510 // Helper function to fill the information map for the different supported
8511 // clauses.
8512 auto &&InfoGen =
8513 [&Info, &SkipVarSet](
8514 const ValueDecl *D, MapKind Kind,
8515 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8516 OpenMPMapClauseKind MapType,
8517 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8518 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8519 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8520 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8521 if (SkipVarSet.contains(D))
8522 return;
8523 auto It = Info.find(D);
8524 if (It == Info.end())
8525 It = Info
8526 .insert(std::make_pair(
8527 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8528 .first;
8529 It->second[Kind].emplace_back(
8530 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8531 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8532 };
8533
8534 for (const auto *Cl : Clauses) {
8535 const auto *C = dyn_cast<OMPMapClause>(Cl);
8536 if (!C)
8537 continue;
8538 MapKind Kind = Other;
8539 if (!C->getMapTypeModifiers().empty() &&
8540 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8541 return K == OMPC_MAP_MODIFIER_present;
8542 }))
8543 Kind = Present;
8544 else if (C->getMapType() == OMPC_MAP_alloc)
8545 Kind = Allocs;
8546 const auto *EI = C->getVarRefs().begin();
8547 for (const auto L : C->component_lists()) {
8548 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8549 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8550 C->getMapTypeModifiers(), llvm::None,
8551 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8552 E);
8553 ++EI;
8554 }
8555 }
8556 for (const auto *Cl : Clauses) {
8557 const auto *C = dyn_cast<OMPToClause>(Cl);
8558 if (!C)
8559 continue;
8560 MapKind Kind = Other;
8561 if (!C->getMotionModifiers().empty() &&
8562 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8563 return K == OMPC_MOTION_MODIFIER_present;
8564 }))
8565 Kind = Present;
8566 const auto *EI = C->getVarRefs().begin();
8567 for (const auto L : C->component_lists()) {
8568 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8569 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8570 C->isImplicit(), std::get<2>(L), *EI);
8571 ++EI;
8572 }
8573 }
8574 for (const auto *Cl : Clauses) {
8575 const auto *C = dyn_cast<OMPFromClause>(Cl);
8576 if (!C)
8577 continue;
8578 MapKind Kind = Other;
8579 if (!C->getMotionModifiers().empty() &&
8580 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8581 return K == OMPC_MOTION_MODIFIER_present;
8582 }))
8583 Kind = Present;
8584 const auto *EI = C->getVarRefs().begin();
8585 for (const auto L : C->component_lists()) {
8586 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8587 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8588 C->isImplicit(), std::get<2>(L), *EI);
8589 ++EI;
8590 }
8591 }
8592
8593 // Look at the use_device_ptr clause information and mark the existing map
8594 // entries as such. If there is no map information for an entry in the
8595 // use_device_ptr list, we create one with map type 'alloc' and zero size
8596 // section. It is the user fault if that was not mapped before. If there is
8597 // no map information and the pointer is a struct member, then we defer the
8598 // emission of that entry until the whole struct has been processed.
8599 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8600 SmallVector<DeferredDevicePtrEntryTy, 4>>
8601 DeferredInfo;
8602 MapCombinedInfoTy UseDevicePtrCombinedInfo;
8603
8604 for (const auto *Cl : Clauses) {
8605 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8606 if (!C)
8607 continue;
8608 for (const auto L : C->component_lists()) {
8609 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8610 std::get<1>(L);
8611 assert(!Components.empty() &&
8612 "Not expecting empty list of components!");
8613 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8614 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8615 const Expr *IE = Components.back().getAssociatedExpression();
8616 // If the first component is a member expression, we have to look into
8617 // 'this', which maps to null in the map of map information. Otherwise
8618 // look directly for the information.
8619 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8620
8621 // We potentially have map information for this declaration already.
8622 // Look for the first set of components that refer to it.
8623 if (It != Info.end()) {
8624 bool Found = false;
8625 for (auto &Data : It->second) {
8626 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8627 return MI.Components.back().getAssociatedDeclaration() == VD;
8628 });
8629 // If we found a map entry, signal that the pointer has to be
8630 // returned and move on to the next declaration. Exclude cases where
8631 // the base pointer is mapped as array subscript, array section or
8632 // array shaping. The base address is passed as a pointer to base in
8633 // this case and cannot be used as a base for use_device_ptr list
8634 // item.
8635 if (CI != Data.end()) {
8636 auto PrevCI = std::next(CI->Components.rbegin());
8637 const auto *VarD = dyn_cast<VarDecl>(VD);
8638 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8639 isa<MemberExpr>(IE) ||
8640 !VD->getType().getNonReferenceType()->isPointerType() ||
8641 PrevCI == CI->Components.rend() ||
8642 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8643 VarD->hasLocalStorage()) {
8644 CI->ReturnDevicePointer = true;
8645 Found = true;
8646 break;
8647 }
8648 }
8649 }
8650 if (Found)
8651 continue;
8652 }
8653
8654 // We didn't find any match in our map information - generate a zero
8655 // size array section - if the pointer is a struct member we defer this
8656 // action until the whole struct has been processed.
8657 if (isa<MemberExpr>(IE)) {
8658 // Insert the pointer into Info to be processed by
8659 // generateInfoForComponentList. Because it is a member pointer
8660 // without a pointee, no entry will be generated for it, therefore
8661 // we need to generate one after the whole struct has been processed.
8662 // Nonetheless, generateInfoForComponentList must be called to take
8663 // the pointer into account for the calculation of the range of the
8664 // partial struct.
8665 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8666 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8667 nullptr);
8668 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8669 } else {
8670 llvm::Value *Ptr =
8671 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8672 UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8673 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8674 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8675 UseDevicePtrCombinedInfo.Sizes.push_back(
8676 llvm::Constant::getNullValue(CGF.Int64Ty));
8677 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8678 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8679 }
8680 }
8681 }
8682
8683 // Look at the use_device_addr clause information and mark the existing map
8684 // entries as such. If there is no map information for an entry in the
8685 // use_device_addr list, we create one with map type 'alloc' and zero size
8686 // section. It is the user fault if that was not mapped before. If there is
8687 // no map information and the pointer is a struct member, then we defer the
8688 // emission of that entry until the whole struct has been processed.
8689 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8690 for (const auto *Cl : Clauses) {
8691 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8692 if (!C)
8693 continue;
8694 for (const auto L : C->component_lists()) {
8695 assert(!std::get<1>(L).empty() &&
8696 "Not expecting empty list of components!");
8697 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8698 if (!Processed.insert(VD).second)
8699 continue;
8700 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8701 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8702 // If the first component is a member expression, we have to look into
8703 // 'this', which maps to null in the map of map information. Otherwise
8704 // look directly for the information.
8705 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8706
8707 // We potentially have map information for this declaration already.
8708 // Look for the first set of components that refer to it.
8709 if (It != Info.end()) {
8710 bool Found = false;
8711 for (auto &Data : It->second) {
8712 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8713 return MI.Components.back().getAssociatedDeclaration() == VD;
8714 });
8715 // If we found a map entry, signal that the pointer has to be
8716 // returned and move on to the next declaration.
8717 if (CI != Data.end()) {
8718 CI->ReturnDevicePointer = true;
8719 Found = true;
8720 break;
8721 }
8722 }
8723 if (Found)
8724 continue;
8725 }
8726
8727 // We didn't find any match in our map information - generate a zero
8728 // size array section - if the pointer is a struct member we defer this
8729 // action until the whole struct has been processed.
8730 if (isa<MemberExpr>(IE)) {
8731 // Insert the pointer into Info to be processed by
8732 // generateInfoForComponentList. Because it is a member pointer
8733 // without a pointee, no entry will be generated for it, therefore
8734 // we need to generate one after the whole struct has been processed.
8735 // Nonetheless, generateInfoForComponentList must be called to take
8736 // the pointer into account for the calculation of the range of the
8737 // partial struct.
8738 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8739 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8740 nullptr, nullptr, /*ForDeviceAddr=*/true);
8741 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8742 } else {
8743 llvm::Value *Ptr;
8744 if (IE->isGLValue())
8745 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8746 else
8747 Ptr = CGF.EmitScalarExpr(IE);
8748 CombinedInfo.Exprs.push_back(VD);
8749 CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8750 CombinedInfo.Pointers.push_back(Ptr);
8751 CombinedInfo.Sizes.push_back(
8752 llvm::Constant::getNullValue(CGF.Int64Ty));
8753 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8754 CombinedInfo.Mappers.push_back(nullptr);
8755 }
8756 }
8757 }
8758
8759 for (const auto &Data : Info) {
8760 StructRangeInfoTy PartialStruct;
8761 // Temporary generated information.
8762 MapCombinedInfoTy CurInfo;
8763 const Decl *D = Data.first;
8764 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8765 for (const auto &M : Data.second) {
8766 for (const MapInfo &L : M) {
8767 assert(!L.Components.empty() &&
8768 "Not expecting declaration with no component lists.");
8769
8770 // Remember the current base pointer index.
8771 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8772 CurInfo.NonContigInfo.IsNonContiguous =
8773 L.Components.back().isNonContiguous();
8774 generateInfoForComponentList(
8775 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8776 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8777 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8778
8779 // If this entry relates with a device pointer, set the relevant
8780 // declaration and add the 'return pointer' flag.
8781 if (L.ReturnDevicePointer) {
8782 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8783 "Unexpected number of mapped base pointers.");
8784
8785 const ValueDecl *RelevantVD =
8786 L.Components.back().getAssociatedDeclaration();
8787 assert(RelevantVD &&
8788 "No relevant declaration related with device pointer??");
8789
8790 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8791 RelevantVD);
8792 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8793 }
8794 }
8795 }
8796
8797 // Append any pending zero-length pointers which are struct members and
8798 // used with use_device_ptr or use_device_addr.
8799 auto CI = DeferredInfo.find(Data.first);
8800 if (CI != DeferredInfo.end()) {
8801 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8802 llvm::Value *BasePtr;
8803 llvm::Value *Ptr;
8804 if (L.ForDeviceAddr) {
8805 if (L.IE->isGLValue())
8806 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8807 else
8808 Ptr = this->CGF.EmitScalarExpr(L.IE);
8809 BasePtr = Ptr;
8810 // Entry is RETURN_PARAM. Also, set the placeholder value
8811 // MEMBER_OF=FFFF so that the entry is later updated with the
8812 // correct value of MEMBER_OF.
8813 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8814 } else {
8815 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8816 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8817 L.IE->getExprLoc());
8818 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8819 // placeholder value MEMBER_OF=FFFF so that the entry is later
8820 // updated with the correct value of MEMBER_OF.
8821 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8822 OMP_MAP_MEMBER_OF);
8823 }
8824 CurInfo.Exprs.push_back(L.VD);
8825 CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8826 CurInfo.Pointers.push_back(Ptr);
8827 CurInfo.Sizes.push_back(
8828 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8829 CurInfo.Mappers.push_back(nullptr);
8830 }
8831 }
8832 // If there is an entry in PartialStruct it means we have a struct with
8833 // individual members mapped. Emit an extra combined entry.
8834 if (PartialStruct.Base.isValid()) {
8835 CurInfo.NonContigInfo.Dims.push_back(0);
8836 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8837 }
8838
8839 // We need to append the results of this capture to what we already
8840 // have.
8841 CombinedInfo.append(CurInfo);
8842 }
8843 // Append data for use_device_ptr clauses.
8844 CombinedInfo.append(UseDevicePtrCombinedInfo);
8845 }
8846
8847 public:
MappableExprsHandler(const OMPExecutableDirective & Dir,CodeGenFunction & CGF)8848 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8849 : CurDir(&Dir), CGF(CGF) {
8850 // Extract firstprivate clause information.
8851 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8852 for (const auto *D : C->varlists())
8853 FirstPrivateDecls.try_emplace(
8854 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8855 // Extract implicit firstprivates from uses_allocators clauses.
8856 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8857 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8858 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8859 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8860 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8861 /*Implicit=*/true);
8862 else if (const auto *VD = dyn_cast<VarDecl>(
8863 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8864 ->getDecl()))
8865 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8866 }
8867 }
8868 // Extract device pointer clause information.
8869 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8870 for (auto L : C->component_lists())
8871 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8872 }
8873
8874 /// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl & Dir,CodeGenFunction & CGF)8875 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8876 : CurDir(&Dir), CGF(CGF) {}
8877
8878 /// Generate code for the combined entry if we have a partially mapped struct
8879 /// and take care of the mapping flags of the arguments corresponding to
8880 /// individual struct members.
emitCombinedEntry(MapCombinedInfoTy & CombinedInfo,MapFlagsArrayTy & CurTypes,const StructRangeInfoTy & PartialStruct,const ValueDecl * VD=nullptr,bool NotTargetParams=true) const8881 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8882 MapFlagsArrayTy &CurTypes,
8883 const StructRangeInfoTy &PartialStruct,
8884 const ValueDecl *VD = nullptr,
8885 bool NotTargetParams = true) const {
8886 if (CurTypes.size() == 1 &&
8887 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8888 !PartialStruct.IsArraySection)
8889 return;
8890 Address LBAddr = PartialStruct.LowestElem.second;
8891 Address HBAddr = PartialStruct.HighestElem.second;
8892 if (PartialStruct.HasCompleteRecord) {
8893 LBAddr = PartialStruct.LB;
8894 HBAddr = PartialStruct.LB;
8895 }
8896 CombinedInfo.Exprs.push_back(VD);
8897 // Base is the base of the struct
8898 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8899 // Pointer is the address of the lowest element
8900 llvm::Value *LB = LBAddr.getPointer();
8901 CombinedInfo.Pointers.push_back(LB);
8902 // There should not be a mapper for a combined entry.
8903 CombinedInfo.Mappers.push_back(nullptr);
8904 // Size is (addr of {highest+1} element) - (addr of lowest element)
8905 llvm::Value *HB = HBAddr.getPointer();
8906 llvm::Value *HAddr =
8907 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8908 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8909 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8910 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8911 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8912 /*isSigned=*/false);
8913 CombinedInfo.Sizes.push_back(Size);
8914 // Map type is always TARGET_PARAM, if generate info for captures.
8915 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8916 : OMP_MAP_TARGET_PARAM);
8917 // If any element has the present modifier, then make sure the runtime
8918 // doesn't attempt to allocate the struct.
8919 if (CurTypes.end() !=
8920 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8921 return Type & OMP_MAP_PRESENT;
8922 }))
8923 CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8924 // Remove TARGET_PARAM flag from the first element
8925 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8926
8927 // All other current entries will be MEMBER_OF the combined entry
8928 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8929 // 0xFFFF in the MEMBER_OF field).
8930 OpenMPOffloadMappingFlags MemberOfFlag =
8931 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8932 for (auto &M : CurTypes)
8933 setCorrectMemberOfFlag(M, MemberOfFlag);
8934 }
8935
8936 /// Generate all the base pointers, section pointers, sizes, map types, and
8937 /// mappers for the extracted mappable expressions (all included in \a
8938 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8939 /// pair of the relevant declaration and index where it occurs is appended to
8940 /// the device pointers info array.
generateAllInfo(MapCombinedInfoTy & CombinedInfo,const llvm::DenseSet<CanonicalDeclPtr<const Decl>> & SkipVarSet=llvm::DenseSet<CanonicalDeclPtr<const Decl>> ()) const8941 void generateAllInfo(
8942 MapCombinedInfoTy &CombinedInfo,
8943 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8944 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8945 assert(CurDir.is<const OMPExecutableDirective *>() &&
8946 "Expect a executable directive");
8947 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8948 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8949 }
8950
8951 /// Generate all the base pointers, section pointers, sizes, map types, and
8952 /// mappers for the extracted map clauses of user-defined mapper (all included
8953 /// in \a CombinedInfo).
generateAllInfoForMapper(MapCombinedInfoTy & CombinedInfo) const8954 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8955 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8956 "Expect a declare mapper directive");
8957 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8958 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8959 }
8960
8961 /// Emit capture info for lambdas for variables captured by reference.
generateInfoForLambdaCaptures(const ValueDecl * VD,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers) const8962 void generateInfoForLambdaCaptures(
8963 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8964 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8965 const auto *RD = VD->getType()
8966 .getCanonicalType()
8967 .getNonReferenceType()
8968 ->getAsCXXRecordDecl();
8969 if (!RD || !RD->isLambda())
8970 return;
8971 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8972 LValue VDLVal = CGF.MakeAddrLValue(
8973 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8974 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8975 FieldDecl *ThisCapture = nullptr;
8976 RD->getCaptureFields(Captures, ThisCapture);
8977 if (ThisCapture) {
8978 LValue ThisLVal =
8979 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8980 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8981 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8982 VDLVal.getPointer(CGF));
8983 CombinedInfo.Exprs.push_back(VD);
8984 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8985 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8986 CombinedInfo.Sizes.push_back(
8987 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8988 CGF.Int64Ty, /*isSigned=*/true));
8989 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8990 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8991 CombinedInfo.Mappers.push_back(nullptr);
8992 }
8993 for (const LambdaCapture &LC : RD->captures()) {
8994 if (!LC.capturesVariable())
8995 continue;
8996 const VarDecl *VD = LC.getCapturedVar();
8997 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8998 continue;
8999 auto It = Captures.find(VD);
9000 assert(It != Captures.end() && "Found lambda capture without field.");
9001 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9002 if (LC.getCaptureKind() == LCK_ByRef) {
9003 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9004 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9005 VDLVal.getPointer(CGF));
9006 CombinedInfo.Exprs.push_back(VD);
9007 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9008 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9009 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9010 CGF.getTypeSize(
9011 VD->getType().getCanonicalType().getNonReferenceType()),
9012 CGF.Int64Ty, /*isSigned=*/true));
9013 } else {
9014 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9015 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9016 VDLVal.getPointer(CGF));
9017 CombinedInfo.Exprs.push_back(VD);
9018 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9019 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9020 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9021 }
9022 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9023 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9024 CombinedInfo.Mappers.push_back(nullptr);
9025 }
9026 }
9027
9028 /// Set correct indices for lambdas captures.
adjustMemberOfForLambdaCaptures(const llvm::DenseMap<llvm::Value *,llvm::Value * > & LambdaPointers,MapBaseValuesArrayTy & BasePointers,MapValuesArrayTy & Pointers,MapFlagsArrayTy & Types) const9029 void adjustMemberOfForLambdaCaptures(
9030 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9031 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9032 MapFlagsArrayTy &Types) const {
9033 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9034 // Set correct member_of idx for all implicit lambda captures.
9035 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9036 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9037 continue;
9038 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9039 assert(BasePtr && "Unable to find base lambda address.");
9040 int TgtIdx = -1;
9041 for (unsigned J = I; J > 0; --J) {
9042 unsigned Idx = J - 1;
9043 if (Pointers[Idx] != BasePtr)
9044 continue;
9045 TgtIdx = Idx;
9046 break;
9047 }
9048 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9049 // All other current entries will be MEMBER_OF the combined entry
9050 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9051 // 0xFFFF in the MEMBER_OF field).
9052 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9053 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9054 }
9055 }
9056
9057 /// Generate the base pointers, section pointers, sizes, map types, and
9058 /// mappers associated to a given capture (all included in \a CombinedInfo).
generateInfoForCapture(const CapturedStmt::Capture * Cap,llvm::Value * Arg,MapCombinedInfoTy & CombinedInfo,StructRangeInfoTy & PartialStruct) const9059 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9060 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9061 StructRangeInfoTy &PartialStruct) const {
9062 assert(!Cap->capturesVariableArrayType() &&
9063 "Not expecting to generate map info for a variable array type!");
9064
9065 // We need to know when we generating information for the first component
9066 const ValueDecl *VD = Cap->capturesThis()
9067 ? nullptr
9068 : Cap->getCapturedVar()->getCanonicalDecl();
9069
9070 // If this declaration appears in a is_device_ptr clause we just have to
9071 // pass the pointer by value. If it is a reference to a declaration, we just
9072 // pass its value.
9073 if (DevPointersMap.count(VD)) {
9074 CombinedInfo.Exprs.push_back(VD);
9075 CombinedInfo.BasePointers.emplace_back(Arg, VD);
9076 CombinedInfo.Pointers.push_back(Arg);
9077 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9078 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9079 /*isSigned=*/true));
9080 CombinedInfo.Types.push_back(
9081 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9082 OMP_MAP_TARGET_PARAM);
9083 CombinedInfo.Mappers.push_back(nullptr);
9084 return;
9085 }
9086
9087 using MapData =
9088 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9089 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9090 const ValueDecl *, const Expr *>;
9091 SmallVector<MapData, 4> DeclComponentLists;
9092 assert(CurDir.is<const OMPExecutableDirective *>() &&
9093 "Expect a executable directive");
9094 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9095 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9096 const auto *EI = C->getVarRefs().begin();
9097 for (const auto L : C->decl_component_lists(VD)) {
9098 const ValueDecl *VDecl, *Mapper;
9099 // The Expression is not correct if the mapping is implicit
9100 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9101 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9102 std::tie(VDecl, Components, Mapper) = L;
9103 assert(VDecl == VD && "We got information for the wrong declaration??");
9104 assert(!Components.empty() &&
9105 "Not expecting declaration with no component lists.");
9106 DeclComponentLists.emplace_back(Components, C->getMapType(),
9107 C->getMapTypeModifiers(),
9108 C->isImplicit(), Mapper, E);
9109 ++EI;
9110 }
9111 }
9112 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9113 const MapData &RHS) {
9114 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9115 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9116 bool HasPresent = !MapModifiers.empty() &&
9117 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9118 return K == clang::OMPC_MAP_MODIFIER_present;
9119 });
9120 bool HasAllocs = MapType == OMPC_MAP_alloc;
9121 MapModifiers = std::get<2>(RHS);
9122 MapType = std::get<1>(LHS);
9123 bool HasPresentR =
9124 !MapModifiers.empty() &&
9125 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9126 return K == clang::OMPC_MAP_MODIFIER_present;
9127 });
9128 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9129 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9130 });
9131
9132 // Find overlapping elements (including the offset from the base element).
9133 llvm::SmallDenseMap<
9134 const MapData *,
9135 llvm::SmallVector<
9136 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9137 4>
9138 OverlappedData;
9139 size_t Count = 0;
9140 for (const MapData &L : DeclComponentLists) {
9141 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9142 OpenMPMapClauseKind MapType;
9143 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9144 bool IsImplicit;
9145 const ValueDecl *Mapper;
9146 const Expr *VarRef;
9147 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9148 L;
9149 ++Count;
9150 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9151 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9152 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9153 VarRef) = L1;
9154 auto CI = Components.rbegin();
9155 auto CE = Components.rend();
9156 auto SI = Components1.rbegin();
9157 auto SE = Components1.rend();
9158 for (; CI != CE && SI != SE; ++CI, ++SI) {
9159 if (CI->getAssociatedExpression()->getStmtClass() !=
9160 SI->getAssociatedExpression()->getStmtClass())
9161 break;
9162 // Are we dealing with different variables/fields?
9163 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9164 break;
9165 }
9166 // Found overlapping if, at least for one component, reached the head
9167 // of the components list.
9168 if (CI == CE || SI == SE) {
9169 // Ignore it if it is the same component.
9170 if (CI == CE && SI == SE)
9171 continue;
9172 const auto It = (SI == SE) ? CI : SI;
9173 // If one component is a pointer and another one is a kind of
9174 // dereference of this pointer (array subscript, section, dereference,
9175 // etc.), it is not an overlapping.
9176 // Same, if one component is a base and another component is a
9177 // dereferenced pointer memberexpr with the same base.
9178 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9179 (std::prev(It)->getAssociatedDeclaration() &&
9180 std::prev(It)
9181 ->getAssociatedDeclaration()
9182 ->getType()
9183 ->isPointerType()) ||
9184 (It->getAssociatedDeclaration() &&
9185 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9186 std::next(It) != CE && std::next(It) != SE))
9187 continue;
9188 const MapData &BaseData = CI == CE ? L : L1;
9189 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9190 SI == SE ? Components : Components1;
9191 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9192 OverlappedElements.getSecond().push_back(SubData);
9193 }
9194 }
9195 }
9196 // Sort the overlapped elements for each item.
9197 llvm::SmallVector<const FieldDecl *, 4> Layout;
9198 if (!OverlappedData.empty()) {
9199 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9200 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9201 while (BaseType != OrigType) {
9202 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9203 OrigType = BaseType->getPointeeOrArrayElementType();
9204 }
9205
9206 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9207 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9208 else {
9209 const auto *RD = BaseType->getAsRecordDecl();
9210 Layout.append(RD->field_begin(), RD->field_end());
9211 }
9212 }
9213 for (auto &Pair : OverlappedData) {
9214 llvm::stable_sort(
9215 Pair.getSecond(),
9216 [&Layout](
9217 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9218 OMPClauseMappableExprCommon::MappableExprComponentListRef
9219 Second) {
9220 auto CI = First.rbegin();
9221 auto CE = First.rend();
9222 auto SI = Second.rbegin();
9223 auto SE = Second.rend();
9224 for (; CI != CE && SI != SE; ++CI, ++SI) {
9225 if (CI->getAssociatedExpression()->getStmtClass() !=
9226 SI->getAssociatedExpression()->getStmtClass())
9227 break;
9228 // Are we dealing with different variables/fields?
9229 if (CI->getAssociatedDeclaration() !=
9230 SI->getAssociatedDeclaration())
9231 break;
9232 }
9233
9234 // Lists contain the same elements.
9235 if (CI == CE && SI == SE)
9236 return false;
9237
9238 // List with less elements is less than list with more elements.
9239 if (CI == CE || SI == SE)
9240 return CI == CE;
9241
9242 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9243 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9244 if (FD1->getParent() == FD2->getParent())
9245 return FD1->getFieldIndex() < FD2->getFieldIndex();
9246 const auto *It =
9247 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9248 return FD == FD1 || FD == FD2;
9249 });
9250 return *It == FD1;
9251 });
9252 }
9253
9254 // Associated with a capture, because the mapping flags depend on it.
9255 // Go through all of the elements with the overlapped elements.
9256 bool IsFirstComponentList = true;
9257 for (const auto &Pair : OverlappedData) {
9258 const MapData &L = *Pair.getFirst();
9259 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9260 OpenMPMapClauseKind MapType;
9261 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9262 bool IsImplicit;
9263 const ValueDecl *Mapper;
9264 const Expr *VarRef;
9265 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9266 L;
9267 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9268 OverlappedComponents = Pair.getSecond();
9269 generateInfoForComponentList(
9270 MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9271 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9272 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9273 IsFirstComponentList = false;
9274 }
9275 // Go through other elements without overlapped elements.
9276 for (const MapData &L : DeclComponentLists) {
9277 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9278 OpenMPMapClauseKind MapType;
9279 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9280 bool IsImplicit;
9281 const ValueDecl *Mapper;
9282 const Expr *VarRef;
9283 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9284 L;
9285 auto It = OverlappedData.find(&L);
9286 if (It == OverlappedData.end())
9287 generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9288 Components, CombinedInfo, PartialStruct,
9289 IsFirstComponentList, IsImplicit, Mapper,
9290 /*ForDeviceAddr=*/false, VD, VarRef);
9291 IsFirstComponentList = false;
9292 }
9293 }
9294
9295 /// Generate the default map information for a given capture \a CI,
9296 /// record field declaration \a RI and captured value \a CV.
generateDefaultMapInfo(const CapturedStmt::Capture & CI,const FieldDecl & RI,llvm::Value * CV,MapCombinedInfoTy & CombinedInfo) const9297 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9298 const FieldDecl &RI, llvm::Value *CV,
9299 MapCombinedInfoTy &CombinedInfo) const {
9300 bool IsImplicit = true;
9301 // Do the default mapping.
9302 if (CI.capturesThis()) {
9303 CombinedInfo.Exprs.push_back(nullptr);
9304 CombinedInfo.BasePointers.push_back(CV);
9305 CombinedInfo.Pointers.push_back(CV);
9306 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9307 CombinedInfo.Sizes.push_back(
9308 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9309 CGF.Int64Ty, /*isSigned=*/true));
9310 // Default map type.
9311 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9312 } else if (CI.capturesVariableByCopy()) {
9313 const VarDecl *VD = CI.getCapturedVar();
9314 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9315 CombinedInfo.BasePointers.push_back(CV);
9316 CombinedInfo.Pointers.push_back(CV);
9317 if (!RI.getType()->isAnyPointerType()) {
9318 // We have to signal to the runtime captures passed by value that are
9319 // not pointers.
9320 CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9321 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9322 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9323 } else {
9324 // Pointers are implicitly mapped with a zero size and no flags
9325 // (other than first map that is added for all implicit maps).
9326 CombinedInfo.Types.push_back(OMP_MAP_NONE);
9327 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9328 }
9329 auto I = FirstPrivateDecls.find(VD);
9330 if (I != FirstPrivateDecls.end())
9331 IsImplicit = I->getSecond();
9332 } else {
9333 assert(CI.capturesVariable() && "Expected captured reference.");
9334 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9335 QualType ElementType = PtrTy->getPointeeType();
9336 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9337 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9338 // The default map type for a scalar/complex type is 'to' because by
9339 // default the value doesn't have to be retrieved. For an aggregate
9340 // type, the default is 'tofrom'.
9341 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9342 const VarDecl *VD = CI.getCapturedVar();
9343 auto I = FirstPrivateDecls.find(VD);
9344 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9345 CombinedInfo.BasePointers.push_back(CV);
9346 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9347 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9348 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9349 AlignmentSource::Decl));
9350 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9351 } else {
9352 CombinedInfo.Pointers.push_back(CV);
9353 }
9354 if (I != FirstPrivateDecls.end())
9355 IsImplicit = I->getSecond();
9356 }
9357 // Every default map produces a single argument which is a target parameter.
9358 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9359
9360 // Add flag stating this is an implicit map.
9361 if (IsImplicit)
9362 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9363
9364 // No user-defined mapper for default mapping.
9365 CombinedInfo.Mappers.push_back(nullptr);
9366 }
9367 };
9368 } // anonymous namespace
9369
emitNonContiguousDescriptor(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info)9370 static void emitNonContiguousDescriptor(
9371 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9372 CGOpenMPRuntime::TargetDataInfo &Info) {
9373 CodeGenModule &CGM = CGF.CGM;
9374 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9375 &NonContigInfo = CombinedInfo.NonContigInfo;
9376
9377 // Build an array of struct descriptor_dim and then assign it to
9378 // offload_args.
9379 //
9380 // struct descriptor_dim {
9381 // uint64_t offset;
9382 // uint64_t count;
9383 // uint64_t stride
9384 // };
9385 ASTContext &C = CGF.getContext();
9386 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9387 RecordDecl *RD;
9388 RD = C.buildImplicitRecord("descriptor_dim");
9389 RD->startDefinition();
9390 addFieldToRecordDecl(C, RD, Int64Ty);
9391 addFieldToRecordDecl(C, RD, Int64Ty);
9392 addFieldToRecordDecl(C, RD, Int64Ty);
9393 RD->completeDefinition();
9394 QualType DimTy = C.getRecordType(RD);
9395
9396 enum { OffsetFD = 0, CountFD, StrideFD };
9397 // We need two index variable here since the size of "Dims" is the same as the
9398 // size of Components, however, the size of offset, count, and stride is equal
9399 // to the size of base declaration that is non-contiguous.
9400 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9401 // Skip emitting ir if dimension size is 1 since it cannot be
9402 // non-contiguous.
9403 if (NonContigInfo.Dims[I] == 1)
9404 continue;
9405 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9406 QualType ArrayTy =
9407 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9408 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9409 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9410 unsigned RevIdx = EE - II - 1;
9411 LValue DimsLVal = CGF.MakeAddrLValue(
9412 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9413 // Offset
9414 LValue OffsetLVal = CGF.EmitLValueForField(
9415 DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9416 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9417 // Count
9418 LValue CountLVal = CGF.EmitLValueForField(
9419 DimsLVal, *std::next(RD->field_begin(), CountFD));
9420 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9421 // Stride
9422 LValue StrideLVal = CGF.EmitLValueForField(
9423 DimsLVal, *std::next(RD->field_begin(), StrideFD));
9424 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9425 }
9426 // args[I] = &dims
9427 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9428 DimsAddr, CGM.Int8PtrTy);
9429 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9430 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9431 Info.PointersArray, 0, I);
9432 Address PAddr(P, CGF.getPointerAlign());
9433 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9434 ++L;
9435 }
9436 }
9437
9438 /// Emit a string constant containing the names of the values mapped to the
9439 /// offloading runtime library.
9440 llvm::Constant *
emitMappingInformation(CodeGenFunction & CGF,llvm::OpenMPIRBuilder & OMPBuilder,MappableExprsHandler::MappingExprInfo & MapExprs)9441 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9442 MappableExprsHandler::MappingExprInfo &MapExprs) {
9443 llvm::Constant *SrcLocStr;
9444 if (!MapExprs.getMapDecl()) {
9445 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9446 } else {
9447 std::string ExprName = "";
9448 if (MapExprs.getMapExpr()) {
9449 PrintingPolicy P(CGF.getContext().getLangOpts());
9450 llvm::raw_string_ostream OS(ExprName);
9451 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9452 OS.flush();
9453 } else {
9454 ExprName = MapExprs.getMapDecl()->getNameAsString();
9455 }
9456
9457 SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9458 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9459 const char *FileName = PLoc.getFilename();
9460 unsigned Line = PLoc.getLine();
9461 unsigned Column = PLoc.getColumn();
9462 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9463 Line, Column);
9464 }
9465 return SrcLocStr;
9466 }
9467
9468 /// Emit the arrays used to pass the captures and map information to the
9469 /// offloading runtime library. If there is no map or capture information,
9470 /// return nullptr by reference.
emitOffloadingArrays(CodeGenFunction & CGF,MappableExprsHandler::MapCombinedInfoTy & CombinedInfo,CGOpenMPRuntime::TargetDataInfo & Info,llvm::OpenMPIRBuilder & OMPBuilder,bool IsNonContiguous=false)9471 static void emitOffloadingArrays(
9472 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9473 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9474 bool IsNonContiguous = false) {
9475 CodeGenModule &CGM = CGF.CGM;
9476 ASTContext &Ctx = CGF.getContext();
9477
9478 // Reset the array information.
9479 Info.clearArrayInfo();
9480 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9481
9482 if (Info.NumberOfPtrs) {
9483 // Detect if we have any capture size requiring runtime evaluation of the
9484 // size so that a constant array could be eventually used.
9485 bool hasRuntimeEvaluationCaptureSize = false;
9486 for (llvm::Value *S : CombinedInfo.Sizes)
9487 if (!isa<llvm::Constant>(S)) {
9488 hasRuntimeEvaluationCaptureSize = true;
9489 break;
9490 }
9491
9492 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9493 QualType PointerArrayType = Ctx.getConstantArrayType(
9494 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9495 /*IndexTypeQuals=*/0);
9496
9497 Info.BasePointersArray =
9498 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9499 Info.PointersArray =
9500 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9501 Address MappersArray =
9502 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9503 Info.MappersArray = MappersArray.getPointer();
9504
9505 // If we don't have any VLA types or other types that require runtime
9506 // evaluation, we can use a constant array for the map sizes, otherwise we
9507 // need to fill up the arrays as we do for the pointers.
9508 QualType Int64Ty =
9509 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9510 if (hasRuntimeEvaluationCaptureSize) {
9511 QualType SizeArrayType = Ctx.getConstantArrayType(
9512 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9513 /*IndexTypeQuals=*/0);
9514 Info.SizesArray =
9515 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9516 } else {
9517 // We expect all the sizes to be constant, so we collect them to create
9518 // a constant array.
9519 SmallVector<llvm::Constant *, 16> ConstSizes;
9520 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9521 if (IsNonContiguous &&
9522 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9523 ConstSizes.push_back(llvm::ConstantInt::get(
9524 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9525 } else {
9526 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9527 }
9528 }
9529
9530 auto *SizesArrayInit = llvm::ConstantArray::get(
9531 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9532 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9533 auto *SizesArrayGbl = new llvm::GlobalVariable(
9534 CGM.getModule(), SizesArrayInit->getType(),
9535 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9536 SizesArrayInit, Name);
9537 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9538 Info.SizesArray = SizesArrayGbl;
9539 }
9540
9541 // The map types are always constant so we don't need to generate code to
9542 // fill arrays. Instead, we create an array constant.
9543 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9544 llvm::copy(CombinedInfo.Types, Mapping.begin());
9545 std::string MaptypesName =
9546 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9547 auto *MapTypesArrayGbl =
9548 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9549 Info.MapTypesArray = MapTypesArrayGbl;
9550
9551 // The information types are only built if there is debug information
9552 // requested.
9553 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9554 Info.MapNamesArray = llvm::Constant::getNullValue(
9555 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9556 } else {
9557 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9558 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9559 };
9560 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9561 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9562 std::string MapnamesName =
9563 CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9564 auto *MapNamesArrayGbl =
9565 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9566 Info.MapNamesArray = MapNamesArrayGbl;
9567 }
9568
9569 // If there's a present map type modifier, it must not be applied to the end
9570 // of a region, so generate a separate map type array in that case.
9571 if (Info.separateBeginEndCalls()) {
9572 bool EndMapTypesDiffer = false;
9573 for (uint64_t &Type : Mapping) {
9574 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9575 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9576 EndMapTypesDiffer = true;
9577 }
9578 }
9579 if (EndMapTypesDiffer) {
9580 MapTypesArrayGbl =
9581 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9582 Info.MapTypesArrayEnd = MapTypesArrayGbl;
9583 }
9584 }
9585
9586 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9587 llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9588 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9589 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9590 Info.BasePointersArray, 0, I);
9591 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9592 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9593 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9594 CGF.Builder.CreateStore(BPVal, BPAddr);
9595
9596 if (Info.requiresDevicePointerInfo())
9597 if (const ValueDecl *DevVD =
9598 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9599 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9600
9601 llvm::Value *PVal = CombinedInfo.Pointers[I];
9602 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9603 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9604 Info.PointersArray, 0, I);
9605 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9606 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9607 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9608 CGF.Builder.CreateStore(PVal, PAddr);
9609
9610 if (hasRuntimeEvaluationCaptureSize) {
9611 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9612 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9613 Info.SizesArray,
9614 /*Idx0=*/0,
9615 /*Idx1=*/I);
9616 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9617 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9618 CGM.Int64Ty,
9619 /*isSigned=*/true),
9620 SAddr);
9621 }
9622
9623 // Fill up the mapper array.
9624 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9625 if (CombinedInfo.Mappers[I]) {
9626 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9627 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9628 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9629 Info.HasMapper = true;
9630 }
9631 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9632 CGF.Builder.CreateStore(MFunc, MAddr);
9633 }
9634 }
9635
9636 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9637 Info.NumberOfPtrs == 0)
9638 return;
9639
9640 emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9641 }
9642
9643 namespace {
9644 /// Additional arguments for emitOffloadingArraysArgument function.
9645 struct ArgumentsOptions {
9646 bool ForEndCall = false;
9647 ArgumentsOptions() = default;
ArgumentsOptions__anona0766a094d11::ArgumentsOptions9648 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9649 };
9650 } // namespace
9651
9652 /// Emit the arguments to be passed to the runtime library based on the
9653 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
9654 /// ForEndCall, emit map types to be passed for the end of the region instead of
9655 /// the beginning.
emitOffloadingArraysArgument(CodeGenFunction & CGF,llvm::Value * & BasePointersArrayArg,llvm::Value * & PointersArrayArg,llvm::Value * & SizesArrayArg,llvm::Value * & MapTypesArrayArg,llvm::Value * & MapNamesArrayArg,llvm::Value * & MappersArrayArg,CGOpenMPRuntime::TargetDataInfo & Info,const ArgumentsOptions & Options=ArgumentsOptions ())9656 static void emitOffloadingArraysArgument(
9657 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9658 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9659 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9660 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9661 const ArgumentsOptions &Options = ArgumentsOptions()) {
9662 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9663 "expected region end call to runtime only when end call is separate");
9664 CodeGenModule &CGM = CGF.CGM;
9665 if (Info.NumberOfPtrs) {
9666 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9667 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9668 Info.BasePointersArray,
9669 /*Idx0=*/0, /*Idx1=*/0);
9670 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9671 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9672 Info.PointersArray,
9673 /*Idx0=*/0,
9674 /*Idx1=*/0);
9675 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9676 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9677 /*Idx0=*/0, /*Idx1=*/0);
9678 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9679 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9680 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9681 : Info.MapTypesArray,
9682 /*Idx0=*/0,
9683 /*Idx1=*/0);
9684
9685 // Only emit the mapper information arrays if debug information is
9686 // requested.
9687 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9688 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9689 else
9690 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9691 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9692 Info.MapNamesArray,
9693 /*Idx0=*/0,
9694 /*Idx1=*/0);
9695 // If there is no user-defined mapper, set the mapper array to nullptr to
9696 // avoid an unnecessary data privatization
9697 if (!Info.HasMapper)
9698 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9699 else
9700 MappersArrayArg =
9701 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9702 } else {
9703 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9704 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9705 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9706 MapTypesArrayArg =
9707 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9708 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9709 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9710 }
9711 }
9712
9713 /// Check for inner distribute directive.
9714 static const OMPExecutableDirective *
getNestedDistributeDirective(ASTContext & Ctx,const OMPExecutableDirective & D)9715 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9716 const auto *CS = D.getInnermostCapturedStmt();
9717 const auto *Body =
9718 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9719 const Stmt *ChildStmt =
9720 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9721
9722 if (const auto *NestedDir =
9723 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9724 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9725 switch (D.getDirectiveKind()) {
9726 case OMPD_target:
9727 if (isOpenMPDistributeDirective(DKind))
9728 return NestedDir;
9729 if (DKind == OMPD_teams) {
9730 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9731 /*IgnoreCaptured=*/true);
9732 if (!Body)
9733 return nullptr;
9734 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9735 if (const auto *NND =
9736 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9737 DKind = NND->getDirectiveKind();
9738 if (isOpenMPDistributeDirective(DKind))
9739 return NND;
9740 }
9741 }
9742 return nullptr;
9743 case OMPD_target_teams:
9744 if (isOpenMPDistributeDirective(DKind))
9745 return NestedDir;
9746 return nullptr;
9747 case OMPD_target_parallel:
9748 case OMPD_target_simd:
9749 case OMPD_target_parallel_for:
9750 case OMPD_target_parallel_for_simd:
9751 return nullptr;
9752 case OMPD_target_teams_distribute:
9753 case OMPD_target_teams_distribute_simd:
9754 case OMPD_target_teams_distribute_parallel_for:
9755 case OMPD_target_teams_distribute_parallel_for_simd:
9756 case OMPD_parallel:
9757 case OMPD_for:
9758 case OMPD_parallel_for:
9759 case OMPD_parallel_master:
9760 case OMPD_parallel_sections:
9761 case OMPD_for_simd:
9762 case OMPD_parallel_for_simd:
9763 case OMPD_cancel:
9764 case OMPD_cancellation_point:
9765 case OMPD_ordered:
9766 case OMPD_threadprivate:
9767 case OMPD_allocate:
9768 case OMPD_task:
9769 case OMPD_simd:
9770 case OMPD_tile:
9771 case OMPD_unroll:
9772 case OMPD_sections:
9773 case OMPD_section:
9774 case OMPD_single:
9775 case OMPD_master:
9776 case OMPD_critical:
9777 case OMPD_taskyield:
9778 case OMPD_barrier:
9779 case OMPD_taskwait:
9780 case OMPD_taskgroup:
9781 case OMPD_atomic:
9782 case OMPD_flush:
9783 case OMPD_depobj:
9784 case OMPD_scan:
9785 case OMPD_teams:
9786 case OMPD_target_data:
9787 case OMPD_target_exit_data:
9788 case OMPD_target_enter_data:
9789 case OMPD_distribute:
9790 case OMPD_distribute_simd:
9791 case OMPD_distribute_parallel_for:
9792 case OMPD_distribute_parallel_for_simd:
9793 case OMPD_teams_distribute:
9794 case OMPD_teams_distribute_simd:
9795 case OMPD_teams_distribute_parallel_for:
9796 case OMPD_teams_distribute_parallel_for_simd:
9797 case OMPD_target_update:
9798 case OMPD_declare_simd:
9799 case OMPD_declare_variant:
9800 case OMPD_begin_declare_variant:
9801 case OMPD_end_declare_variant:
9802 case OMPD_declare_target:
9803 case OMPD_end_declare_target:
9804 case OMPD_declare_reduction:
9805 case OMPD_declare_mapper:
9806 case OMPD_taskloop:
9807 case OMPD_taskloop_simd:
9808 case OMPD_master_taskloop:
9809 case OMPD_master_taskloop_simd:
9810 case OMPD_parallel_master_taskloop:
9811 case OMPD_parallel_master_taskloop_simd:
9812 case OMPD_requires:
9813 case OMPD_unknown:
9814 default:
9815 llvm_unreachable("Unexpected directive.");
9816 }
9817 }
9818
9819 return nullptr;
9820 }
9821
9822 /// Emit the user-defined mapper function. The code generation follows the
9823 /// pattern in the example below.
9824 /// \code
9825 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9826 /// void *base, void *begin,
9827 /// int64_t size, int64_t type,
9828 /// void *name = nullptr) {
9829 /// // Allocate space for an array section first or add a base/begin for
9830 /// // pointer dereference.
9831 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9832 /// !maptype.IsDelete)
9833 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9834 /// size*sizeof(Ty), clearToFromMember(type));
9835 /// // Map members.
9836 /// for (unsigned i = 0; i < size; i++) {
9837 /// // For each component specified by this mapper:
9838 /// for (auto c : begin[i]->all_components) {
9839 /// if (c.hasMapper())
9840 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9841 /// c.arg_type, c.arg_name);
9842 /// else
9843 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9844 /// c.arg_begin, c.arg_size, c.arg_type,
9845 /// c.arg_name);
9846 /// }
9847 /// }
9848 /// // Delete the array section.
9849 /// if (size > 1 && maptype.IsDelete)
9850 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9851 /// size*sizeof(Ty), clearToFromMember(type));
9852 /// }
9853 /// \endcode
emitUserDefinedMapper(const OMPDeclareMapperDecl * D,CodeGenFunction * CGF)9854 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9855 CodeGenFunction *CGF) {
9856 if (UDMMap.count(D) > 0)
9857 return;
9858 ASTContext &C = CGM.getContext();
9859 QualType Ty = D->getType();
9860 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9861 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9862 auto *MapperVarDecl =
9863 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9864 SourceLocation Loc = D->getLocation();
9865 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9866
9867 // Prepare mapper function arguments and attributes.
9868 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9869 C.VoidPtrTy, ImplicitParamDecl::Other);
9870 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9871 ImplicitParamDecl::Other);
9872 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9873 C.VoidPtrTy, ImplicitParamDecl::Other);
9874 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9875 ImplicitParamDecl::Other);
9876 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9877 ImplicitParamDecl::Other);
9878 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9879 ImplicitParamDecl::Other);
9880 FunctionArgList Args;
9881 Args.push_back(&HandleArg);
9882 Args.push_back(&BaseArg);
9883 Args.push_back(&BeginArg);
9884 Args.push_back(&SizeArg);
9885 Args.push_back(&TypeArg);
9886 Args.push_back(&NameArg);
9887 const CGFunctionInfo &FnInfo =
9888 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9889 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9890 SmallString<64> TyStr;
9891 llvm::raw_svector_ostream Out(TyStr);
9892 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9893 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9894 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9895 Name, &CGM.getModule());
9896 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9897 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9898 // Start the mapper function code generation.
9899 CodeGenFunction MapperCGF(CGM);
9900 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9901 // Compute the starting and end addresses of array elements.
9902 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9903 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9904 C.getPointerType(Int64Ty), Loc);
9905 // Prepare common arguments for array initiation and deletion.
9906 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9907 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9908 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9909 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9910 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9911 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9912 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9913 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9914 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9915 // Convert the size in bytes into the number of array elements.
9916 Size = MapperCGF.Builder.CreateExactUDiv(
9917 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9918 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9919 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9920 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9921 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9922 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9923 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9924 C.getPointerType(Int64Ty), Loc);
9925 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9926 MapperCGF.GetAddrOfLocalVar(&NameArg),
9927 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9928
9929 // Emit array initiation if this is an array section and \p MapType indicates
9930 // that memory allocation is required.
9931 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9932 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9933 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9934
9935 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9936
9937 // Emit the loop header block.
9938 MapperCGF.EmitBlock(HeadBB);
9939 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9940 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9941 // Evaluate whether the initial condition is satisfied.
9942 llvm::Value *IsEmpty =
9943 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9944 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9945 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9946
9947 // Emit the loop body block.
9948 MapperCGF.EmitBlock(BodyBB);
9949 llvm::BasicBlock *LastBB = BodyBB;
9950 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9951 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9952 PtrPHI->addIncoming(PtrBegin, EntryBB);
9953 Address PtrCurrent =
9954 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9955 .getAlignment()
9956 .alignmentOfArrayElement(ElementSize));
9957 // Privatize the declared variable of mapper to be the current array element.
9958 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9959 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9960 (void)Scope.Privatize();
9961
9962 // Get map clause information. Fill up the arrays with all mapped variables.
9963 MappableExprsHandler::MapCombinedInfoTy Info;
9964 MappableExprsHandler MEHandler(*D, MapperCGF);
9965 MEHandler.generateAllInfoForMapper(Info);
9966
9967 // Call the runtime API __tgt_mapper_num_components to get the number of
9968 // pre-existing components.
9969 llvm::Value *OffloadingArgs[] = {Handle};
9970 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9971 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9972 OMPRTL___tgt_mapper_num_components),
9973 OffloadingArgs);
9974 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9975 PreviousSize,
9976 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9977
9978 // Fill up the runtime mapper handle for all components.
9979 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9980 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9981 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9982 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9983 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9984 llvm::Value *CurSizeArg = Info.Sizes[I];
9985 llvm::Value *CurNameArg =
9986 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9987 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9988 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9989
9990 // Extract the MEMBER_OF field from the map type.
9991 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9992 llvm::Value *MemberMapType =
9993 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9994
9995 // Combine the map type inherited from user-defined mapper with that
9996 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9997 // bits of the \a MapType, which is the input argument of the mapper
9998 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9999 // bits of MemberMapType.
10000 // [OpenMP 5.0], 1.2.6. map-type decay.
10001 // | alloc | to | from | tofrom | release | delete
10002 // ----------------------------------------------------------
10003 // alloc | alloc | alloc | alloc | alloc | release | delete
10004 // to | alloc | to | alloc | to | release | delete
10005 // from | alloc | alloc | from | from | release | delete
10006 // tofrom | alloc | to | from | tofrom | release | delete
10007 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10008 MapType,
10009 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10010 MappableExprsHandler::OMP_MAP_FROM));
10011 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10012 llvm::BasicBlock *AllocElseBB =
10013 MapperCGF.createBasicBlock("omp.type.alloc.else");
10014 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10015 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10016 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10017 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10018 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10019 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10020 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10021 MapperCGF.EmitBlock(AllocBB);
10022 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10023 MemberMapType,
10024 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10025 MappableExprsHandler::OMP_MAP_FROM)));
10026 MapperCGF.Builder.CreateBr(EndBB);
10027 MapperCGF.EmitBlock(AllocElseBB);
10028 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10029 LeftToFrom,
10030 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10031 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10032 // In case of to, clear OMP_MAP_FROM.
10033 MapperCGF.EmitBlock(ToBB);
10034 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10035 MemberMapType,
10036 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10037 MapperCGF.Builder.CreateBr(EndBB);
10038 MapperCGF.EmitBlock(ToElseBB);
10039 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10040 LeftToFrom,
10041 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10042 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10043 // In case of from, clear OMP_MAP_TO.
10044 MapperCGF.EmitBlock(FromBB);
10045 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10046 MemberMapType,
10047 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10048 // In case of tofrom, do nothing.
10049 MapperCGF.EmitBlock(EndBB);
10050 LastBB = EndBB;
10051 llvm::PHINode *CurMapType =
10052 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10053 CurMapType->addIncoming(AllocMapType, AllocBB);
10054 CurMapType->addIncoming(ToMapType, ToBB);
10055 CurMapType->addIncoming(FromMapType, FromBB);
10056 CurMapType->addIncoming(MemberMapType, ToElseBB);
10057
10058 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
10059 CurSizeArg, CurMapType, CurNameArg};
10060 if (Info.Mappers[I]) {
10061 // Call the corresponding mapper function.
10062 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10063 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10064 assert(MapperFunc && "Expect a valid mapper function is available.");
10065 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10066 } else {
10067 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10068 // data structure.
10069 MapperCGF.EmitRuntimeCall(
10070 OMPBuilder.getOrCreateRuntimeFunction(
10071 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10072 OffloadingArgs);
10073 }
10074 }
10075
10076 // Update the pointer to point to the next element that needs to be mapped,
10077 // and check whether we have mapped all elements.
10078 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10079 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10080 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10081 PtrPHI->addIncoming(PtrNext, LastBB);
10082 llvm::Value *IsDone =
10083 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10084 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10085 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10086
10087 MapperCGF.EmitBlock(ExitBB);
10088 // Emit array deletion if this is an array section and \p MapType indicates
10089 // that deletion is required.
10090 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10091 MapName, ElementSize, DoneBB, /*IsInit=*/false);
10092
10093 // Emit the function exit block.
10094 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10095 MapperCGF.FinishFunction();
10096 UDMMap.try_emplace(D, Fn);
10097 if (CGF) {
10098 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10099 Decls.second.push_back(D);
10100 }
10101 }
10102
10103 /// Emit the array initialization or deletion portion for user-defined mapper
10104 /// code generation. First, it evaluates whether an array section is mapped and
10105 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10106 /// true, and \a MapType indicates to not delete this array, array
10107 /// initialization code is generated. If \a IsInit is false, and \a MapType
10108 /// indicates to not this array, array deletion code is generated.
emitUDMapperArrayInitOrDel(CodeGenFunction & MapperCGF,llvm::Value * Handle,llvm::Value * Base,llvm::Value * Begin,llvm::Value * Size,llvm::Value * MapType,llvm::Value * MapName,CharUnits ElementSize,llvm::BasicBlock * ExitBB,bool IsInit)10109 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10110 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10111 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10112 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10113 bool IsInit) {
10114 StringRef Prefix = IsInit ? ".init" : ".del";
10115
10116 // Evaluate if this is an array section.
10117 llvm::BasicBlock *BodyBB =
10118 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10119 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10120 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10121 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10122 MapType,
10123 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10124 llvm::Value *DeleteCond;
10125 llvm::Value *Cond;
10126 if (IsInit) {
10127 // base != begin?
10128 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10129 MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10130 // IsPtrAndObj?
10131 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10132 MapType,
10133 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10134 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10135 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10136 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10137 DeleteCond = MapperCGF.Builder.CreateIsNull(
10138 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10139 } else {
10140 Cond = IsArray;
10141 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10142 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10143 }
10144 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10145 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10146
10147 MapperCGF.EmitBlock(BodyBB);
10148 // Get the array size by multiplying element size and element number (i.e., \p
10149 // Size).
10150 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10151 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10152 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10153 // memory allocation/deletion purpose only.
10154 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10155 MapType,
10156 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10157 MappableExprsHandler::OMP_MAP_FROM)));
10158 MapTypeArg = MapperCGF.Builder.CreateOr(
10159 MapTypeArg,
10160 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10161
10162 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10163 // data structure.
10164 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
10165 ArraySize, MapTypeArg, MapName};
10166 MapperCGF.EmitRuntimeCall(
10167 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10168 OMPRTL___tgt_push_mapper_component),
10169 OffloadingArgs);
10170 }
10171
getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl * D)10172 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10173 const OMPDeclareMapperDecl *D) {
10174 auto I = UDMMap.find(D);
10175 if (I != UDMMap.end())
10176 return I->second;
10177 emitUserDefinedMapper(D);
10178 return UDMMap.lookup(D);
10179 }
10180
emitTargetNumIterationsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Value * DeviceID,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)10181 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10182 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10183 llvm::Value *DeviceID,
10184 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10185 const OMPLoopDirective &D)>
10186 SizeEmitter) {
10187 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10188 const OMPExecutableDirective *TD = &D;
10189 // Get nested teams distribute kind directive, if any.
10190 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10191 TD = getNestedDistributeDirective(CGM.getContext(), D);
10192 if (!TD)
10193 return;
10194 const auto *LD = cast<OMPLoopDirective>(TD);
10195 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10196 PrePostActionTy &) {
10197 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10199 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10200 CGF.EmitRuntimeCall(
10201 OMPBuilder.getOrCreateRuntimeFunction(
10202 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10203 Args);
10204 }
10205 };
10206 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10207 }
10208
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)10209 void CGOpenMPRuntime::emitTargetCall(
10210 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10211 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10212 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10213 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10214 const OMPLoopDirective &D)>
10215 SizeEmitter) {
10216 if (!CGF.HaveInsertPoint())
10217 return;
10218
10219 assert(OutlinedFn && "Invalid outlined function!");
10220
10221 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10222 D.hasClausesOfKind<OMPNowaitClause>();
10223 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10224 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10225 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10226 PrePostActionTy &) {
10227 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10228 };
10229 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10230
10231 CodeGenFunction::OMPTargetDataInfo InputInfo;
10232 llvm::Value *MapTypesArray = nullptr;
10233 llvm::Value *MapNamesArray = nullptr;
10234 // Fill up the pointer arrays and transfer execution to the device.
10235 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10236 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10237 &CapturedVars,
10238 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10239 if (Device.getInt() == OMPC_DEVICE_ancestor) {
10240 // Reverse offloading is not supported, so just execute on the host.
10241 if (RequiresOuterTask) {
10242 CapturedVars.clear();
10243 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10244 }
10245 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10246 return;
10247 }
10248
10249 // On top of the arrays that were filled up, the target offloading call
10250 // takes as arguments the device id as well as the host pointer. The host
10251 // pointer is used by the runtime library to identify the current target
10252 // region, so it only has to be unique and not necessarily point to
10253 // anything. It could be the pointer to the outlined function that
10254 // implements the target region, but we aren't using that so that the
10255 // compiler doesn't need to keep that, and could therefore inline the host
10256 // function if proven worthwhile during optimization.
10257
10258 // From this point on, we need to have an ID of the target region defined.
10259 assert(OutlinedFnID && "Invalid outlined function ID!");
10260
10261 // Emit device ID if any.
10262 llvm::Value *DeviceID;
10263 if (Device.getPointer()) {
10264 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10265 Device.getInt() == OMPC_DEVICE_device_num) &&
10266 "Expected device_num modifier.");
10267 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10268 DeviceID =
10269 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10270 } else {
10271 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10272 }
10273
10274 // Emit the number of elements in the offloading arrays.
10275 llvm::Value *PointerNum =
10276 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10277
10278 // Return value of the runtime offloading call.
10279 llvm::Value *Return;
10280
10281 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10282 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10283
10284 // Source location for the ident struct
10285 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10286
10287 // Emit tripcount for the target loop-based directive.
10288 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10289
10290 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10291 // The target region is an outlined function launched by the runtime
10292 // via calls __tgt_target() or __tgt_target_teams().
10293 //
10294 // __tgt_target() launches a target region with one team and one thread,
10295 // executing a serial region. This master thread may in turn launch
10296 // more threads within its team upon encountering a parallel region,
10297 // however, no additional teams can be launched on the device.
10298 //
10299 // __tgt_target_teams() launches a target region with one or more teams,
10300 // each with one or more threads. This call is required for target
10301 // constructs such as:
10302 // 'target teams'
10303 // 'target' / 'teams'
10304 // 'target teams distribute parallel for'
10305 // 'target parallel'
10306 // and so on.
10307 //
10308 // Note that on the host and CPU targets, the runtime implementation of
10309 // these calls simply call the outlined function without forking threads.
10310 // The outlined functions themselves have runtime calls to
10311 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10312 // the compiler in emitTeamsCall() and emitParallelCall().
10313 //
10314 // In contrast, on the NVPTX target, the implementation of
10315 // __tgt_target_teams() launches a GPU kernel with the requested number
10316 // of teams and threads so no additional calls to the runtime are required.
10317 if (NumTeams) {
10318 // If we have NumTeams defined this means that we have an enclosed teams
10319 // region. Therefore we also expect to have NumThreads defined. These two
10320 // values should be defined in the presence of a teams directive,
10321 // regardless of having any clauses associated. If the user is using teams
10322 // but no clauses, these two values will be the default that should be
10323 // passed to the runtime library - a 32-bit integer with the value zero.
10324 assert(NumThreads && "Thread limit expression should be available along "
10325 "with number of teams.");
10326 SmallVector<llvm::Value *> OffloadingArgs = {
10327 RTLoc,
10328 DeviceID,
10329 OutlinedFnID,
10330 PointerNum,
10331 InputInfo.BasePointersArray.getPointer(),
10332 InputInfo.PointersArray.getPointer(),
10333 InputInfo.SizesArray.getPointer(),
10334 MapTypesArray,
10335 MapNamesArray,
10336 InputInfo.MappersArray.getPointer(),
10337 NumTeams,
10338 NumThreads};
10339 if (HasNowait) {
10340 // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10341 // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10342 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10343 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10344 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10345 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10346 }
10347 Return = CGF.EmitRuntimeCall(
10348 OMPBuilder.getOrCreateRuntimeFunction(
10349 CGM.getModule(), HasNowait
10350 ? OMPRTL___tgt_target_teams_nowait_mapper
10351 : OMPRTL___tgt_target_teams_mapper),
10352 OffloadingArgs);
10353 } else {
10354 SmallVector<llvm::Value *> OffloadingArgs = {
10355 RTLoc,
10356 DeviceID,
10357 OutlinedFnID,
10358 PointerNum,
10359 InputInfo.BasePointersArray.getPointer(),
10360 InputInfo.PointersArray.getPointer(),
10361 InputInfo.SizesArray.getPointer(),
10362 MapTypesArray,
10363 MapNamesArray,
10364 InputInfo.MappersArray.getPointer()};
10365 if (HasNowait) {
10366 // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10367 // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10368 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10369 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10370 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10371 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10372 }
10373 Return = CGF.EmitRuntimeCall(
10374 OMPBuilder.getOrCreateRuntimeFunction(
10375 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10376 : OMPRTL___tgt_target_mapper),
10377 OffloadingArgs);
10378 }
10379
10380 // Check the error code and execute the host version if required.
10381 llvm::BasicBlock *OffloadFailedBlock =
10382 CGF.createBasicBlock("omp_offload.failed");
10383 llvm::BasicBlock *OffloadContBlock =
10384 CGF.createBasicBlock("omp_offload.cont");
10385 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10386 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10387
10388 CGF.EmitBlock(OffloadFailedBlock);
10389 if (RequiresOuterTask) {
10390 CapturedVars.clear();
10391 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10392 }
10393 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10394 CGF.EmitBranch(OffloadContBlock);
10395
10396 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10397 };
10398
10399 // Notify that the host version must be executed.
10400 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10401 RequiresOuterTask](CodeGenFunction &CGF,
10402 PrePostActionTy &) {
10403 if (RequiresOuterTask) {
10404 CapturedVars.clear();
10405 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10406 }
10407 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10408 };
10409
10410 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10411 &MapNamesArray, &CapturedVars, RequiresOuterTask,
10412 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10413 // Fill up the arrays with all the captured variables.
10414 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10415
10416 // Get mappable expression information.
10417 MappableExprsHandler MEHandler(D, CGF);
10418 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10419 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10420
10421 auto RI = CS.getCapturedRecordDecl()->field_begin();
10422 auto *CV = CapturedVars.begin();
10423 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10424 CE = CS.capture_end();
10425 CI != CE; ++CI, ++RI, ++CV) {
10426 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10427 MappableExprsHandler::StructRangeInfoTy PartialStruct;
10428
10429 // VLA sizes are passed to the outlined region by copy and do not have map
10430 // information associated.
10431 if (CI->capturesVariableArrayType()) {
10432 CurInfo.Exprs.push_back(nullptr);
10433 CurInfo.BasePointers.push_back(*CV);
10434 CurInfo.Pointers.push_back(*CV);
10435 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10436 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10437 // Copy to the device as an argument. No need to retrieve it.
10438 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10439 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10440 MappableExprsHandler::OMP_MAP_IMPLICIT);
10441 CurInfo.Mappers.push_back(nullptr);
10442 } else {
10443 // If we have any information in the map clause, we use it, otherwise we
10444 // just do a default mapping.
10445 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10446 if (!CI->capturesThis())
10447 MappedVarSet.insert(CI->getCapturedVar());
10448 else
10449 MappedVarSet.insert(nullptr);
10450 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10451 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10452 // Generate correct mapping for variables captured by reference in
10453 // lambdas.
10454 if (CI->capturesVariable())
10455 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10456 CurInfo, LambdaPointers);
10457 }
10458 // We expect to have at least an element of information for this capture.
10459 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10460 "Non-existing map pointer for capture!");
10461 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10462 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10463 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10464 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10465 "Inconsistent map information sizes!");
10466
10467 // If there is an entry in PartialStruct it means we have a struct with
10468 // individual members mapped. Emit an extra combined entry.
10469 if (PartialStruct.Base.isValid()) {
10470 CombinedInfo.append(PartialStruct.PreliminaryMapData);
10471 MEHandler.emitCombinedEntry(
10472 CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10473 !PartialStruct.PreliminaryMapData.BasePointers.empty());
10474 }
10475
10476 // We need to append the results of this capture to what we already have.
10477 CombinedInfo.append(CurInfo);
10478 }
10479 // Adjust MEMBER_OF flags for the lambdas captures.
10480 MEHandler.adjustMemberOfForLambdaCaptures(
10481 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10482 CombinedInfo.Types);
10483 // Map any list items in a map clause that were not captures because they
10484 // weren't referenced within the construct.
10485 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10486
10487 TargetDataInfo Info;
10488 // Fill up the arrays and create the arguments.
10489 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10490 emitOffloadingArraysArgument(
10491 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10492 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10493 {/*ForEndTask=*/false});
10494
10495 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10496 InputInfo.BasePointersArray =
10497 Address(Info.BasePointersArray, CGM.getPointerAlign());
10498 InputInfo.PointersArray =
10499 Address(Info.PointersArray, CGM.getPointerAlign());
10500 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10501 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10502 MapTypesArray = Info.MapTypesArray;
10503 MapNamesArray = Info.MapNamesArray;
10504 if (RequiresOuterTask)
10505 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10506 else
10507 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10508 };
10509
10510 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10511 CodeGenFunction &CGF, PrePostActionTy &) {
10512 if (RequiresOuterTask) {
10513 CodeGenFunction::OMPTargetDataInfo InputInfo;
10514 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10515 } else {
10516 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10517 }
10518 };
10519
10520 // If we have a target function ID it means that we need to support
10521 // offloading, otherwise, just execute on the host. We need to execute on host
10522 // regardless of the conditional in the if clause if, e.g., the user do not
10523 // specify target triples.
10524 if (OutlinedFnID) {
10525 if (IfCond) {
10526 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10527 } else {
10528 RegionCodeGenTy ThenRCG(TargetThenGen);
10529 ThenRCG(CGF);
10530 }
10531 } else {
10532 RegionCodeGenTy ElseRCG(TargetElseGen);
10533 ElseRCG(CGF);
10534 }
10535 }
10536
scanForTargetRegionsFunctions(const Stmt * S,StringRef ParentName)10537 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10538 StringRef ParentName) {
10539 if (!S)
10540 return;
10541
10542 // Codegen OMP target directives that offload compute to the device.
10543 bool RequiresDeviceCodegen =
10544 isa<OMPExecutableDirective>(S) &&
10545 isOpenMPTargetExecutionDirective(
10546 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10547
10548 if (RequiresDeviceCodegen) {
10549 const auto &E = *cast<OMPExecutableDirective>(S);
10550 unsigned DeviceID;
10551 unsigned FileID;
10552 unsigned Line;
10553 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10554 FileID, Line);
10555
10556 // Is this a target region that should not be emitted as an entry point? If
10557 // so just signal we are done with this target region.
10558 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10559 ParentName, Line))
10560 return;
10561
10562 switch (E.getDirectiveKind()) {
10563 case OMPD_target:
10564 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10565 cast<OMPTargetDirective>(E));
10566 break;
10567 case OMPD_target_parallel:
10568 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10569 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10570 break;
10571 case OMPD_target_teams:
10572 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10573 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10574 break;
10575 case OMPD_target_teams_distribute:
10576 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10577 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10578 break;
10579 case OMPD_target_teams_distribute_simd:
10580 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10581 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10582 break;
10583 case OMPD_target_parallel_for:
10584 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10585 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10586 break;
10587 case OMPD_target_parallel_for_simd:
10588 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10589 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10590 break;
10591 case OMPD_target_simd:
10592 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10593 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10594 break;
10595 case OMPD_target_teams_distribute_parallel_for:
10596 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10597 CGM, ParentName,
10598 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10599 break;
10600 case OMPD_target_teams_distribute_parallel_for_simd:
10601 CodeGenFunction::
10602 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10603 CGM, ParentName,
10604 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10605 break;
10606 case OMPD_parallel:
10607 case OMPD_for:
10608 case OMPD_parallel_for:
10609 case OMPD_parallel_master:
10610 case OMPD_parallel_sections:
10611 case OMPD_for_simd:
10612 case OMPD_parallel_for_simd:
10613 case OMPD_cancel:
10614 case OMPD_cancellation_point:
10615 case OMPD_ordered:
10616 case OMPD_threadprivate:
10617 case OMPD_allocate:
10618 case OMPD_task:
10619 case OMPD_simd:
10620 case OMPD_tile:
10621 case OMPD_unroll:
10622 case OMPD_sections:
10623 case OMPD_section:
10624 case OMPD_single:
10625 case OMPD_master:
10626 case OMPD_critical:
10627 case OMPD_taskyield:
10628 case OMPD_barrier:
10629 case OMPD_taskwait:
10630 case OMPD_taskgroup:
10631 case OMPD_atomic:
10632 case OMPD_flush:
10633 case OMPD_depobj:
10634 case OMPD_scan:
10635 case OMPD_teams:
10636 case OMPD_target_data:
10637 case OMPD_target_exit_data:
10638 case OMPD_target_enter_data:
10639 case OMPD_distribute:
10640 case OMPD_distribute_simd:
10641 case OMPD_distribute_parallel_for:
10642 case OMPD_distribute_parallel_for_simd:
10643 case OMPD_teams_distribute:
10644 case OMPD_teams_distribute_simd:
10645 case OMPD_teams_distribute_parallel_for:
10646 case OMPD_teams_distribute_parallel_for_simd:
10647 case OMPD_target_update:
10648 case OMPD_declare_simd:
10649 case OMPD_declare_variant:
10650 case OMPD_begin_declare_variant:
10651 case OMPD_end_declare_variant:
10652 case OMPD_declare_target:
10653 case OMPD_end_declare_target:
10654 case OMPD_declare_reduction:
10655 case OMPD_declare_mapper:
10656 case OMPD_taskloop:
10657 case OMPD_taskloop_simd:
10658 case OMPD_master_taskloop:
10659 case OMPD_master_taskloop_simd:
10660 case OMPD_parallel_master_taskloop:
10661 case OMPD_parallel_master_taskloop_simd:
10662 case OMPD_requires:
10663 case OMPD_unknown:
10664 default:
10665 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10666 }
10667 return;
10668 }
10669
10670 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10671 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10672 return;
10673
10674 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10675 return;
10676 }
10677
10678 // If this is a lambda function, look into its body.
10679 if (const auto *L = dyn_cast<LambdaExpr>(S))
10680 S = L->getBody();
10681
10682 // Keep looking for target regions recursively.
10683 for (const Stmt *II : S->children())
10684 scanForTargetRegionsFunctions(II, ParentName);
10685 }
10686
isAssumedToBeNotEmitted(const ValueDecl * VD,bool IsDevice)10687 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10688 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10689 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10690 if (!DevTy)
10691 return false;
10692 // Do not emit device_type(nohost) functions for the host.
10693 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10694 return true;
10695 // Do not emit device_type(host) functions for the device.
10696 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10697 return true;
10698 return false;
10699 }
10700
emitTargetFunctions(GlobalDecl GD)10701 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10702 // If emitting code for the host, we do not process FD here. Instead we do
10703 // the normal code generation.
10704 if (!CGM.getLangOpts().OpenMPIsDevice) {
10705 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10706 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10707 CGM.getLangOpts().OpenMPIsDevice))
10708 return true;
10709 return false;
10710 }
10711
10712 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10713 // Try to detect target regions in the function.
10714 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10715 StringRef Name = CGM.getMangledName(GD);
10716 scanForTargetRegionsFunctions(FD->getBody(), Name);
10717 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10718 CGM.getLangOpts().OpenMPIsDevice))
10719 return true;
10720 }
10721
10722 // Do not to emit function if it is not marked as declare target.
10723 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10724 AlreadyEmittedTargetDecls.count(VD) == 0;
10725 }
10726
emitTargetGlobalVariable(GlobalDecl GD)10727 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10728 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10729 CGM.getLangOpts().OpenMPIsDevice))
10730 return true;
10731
10732 if (!CGM.getLangOpts().OpenMPIsDevice)
10733 return false;
10734
10735 // Check if there are Ctors/Dtors in this declaration and look for target
10736 // regions in it. We use the complete variant to produce the kernel name
10737 // mangling.
10738 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10739 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10740 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10741 StringRef ParentName =
10742 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10743 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10744 }
10745 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10746 StringRef ParentName =
10747 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10748 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10749 }
10750 }
10751
10752 // Do not to emit variable if it is not marked as declare target.
10753 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10754 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10755 cast<VarDecl>(GD.getDecl()));
10756 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10757 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10758 HasRequiresUnifiedSharedMemory)) {
10759 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10760 return true;
10761 }
10762 return false;
10763 }
10764
registerTargetGlobalVariable(const VarDecl * VD,llvm::Constant * Addr)10765 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10766 llvm::Constant *Addr) {
10767 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10768 !CGM.getLangOpts().OpenMPIsDevice)
10769 return;
10770
10771 // If we have host/nohost variables, they do not need to be registered.
10772 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10773 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10774 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10775 return;
10776
10777 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10778 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10779 if (!Res) {
10780 if (CGM.getLangOpts().OpenMPIsDevice) {
10781 // Register non-target variables being emitted in device code (debug info
10782 // may cause this).
10783 StringRef VarName = CGM.getMangledName(VD);
10784 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10785 }
10786 return;
10787 }
10788 // Register declare target variables.
10789 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10790 StringRef VarName;
10791 CharUnits VarSize;
10792 llvm::GlobalValue::LinkageTypes Linkage;
10793
10794 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10795 !HasRequiresUnifiedSharedMemory) {
10796 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10797 VarName = CGM.getMangledName(VD);
10798 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10799 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10800 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10801 } else {
10802 VarSize = CharUnits::Zero();
10803 }
10804 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10805 // Temp solution to prevent optimizations of the internal variables.
10806 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10807 // Do not create a "ref-variable" if the original is not also available
10808 // on the host.
10809 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10810 return;
10811 std::string RefName = getName({VarName, "ref"});
10812 if (!CGM.GetGlobalValue(RefName)) {
10813 llvm::Constant *AddrRef =
10814 getOrCreateInternalVariable(Addr->getType(), RefName);
10815 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10816 GVAddrRef->setConstant(/*Val=*/true);
10817 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10818 GVAddrRef->setInitializer(Addr);
10819 CGM.addCompilerUsedGlobal(GVAddrRef);
10820 }
10821 }
10822 } else {
10823 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10824 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10825 HasRequiresUnifiedSharedMemory)) &&
10826 "Declare target attribute must link or to with unified memory.");
10827 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10828 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10829 else
10830 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10831
10832 if (CGM.getLangOpts().OpenMPIsDevice) {
10833 VarName = Addr->getName();
10834 Addr = nullptr;
10835 } else {
10836 VarName = getAddrOfDeclareTargetVar(VD).getName();
10837 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10838 }
10839 VarSize = CGM.getPointerSize();
10840 Linkage = llvm::GlobalValue::WeakAnyLinkage;
10841 }
10842
10843 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10844 VarName, Addr, VarSize, Flags, Linkage);
10845 }
10846
emitTargetGlobal(GlobalDecl GD)10847 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10848 if (isa<FunctionDecl>(GD.getDecl()) ||
10849 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10850 return emitTargetFunctions(GD);
10851
10852 return emitTargetGlobalVariable(GD);
10853 }
10854
emitDeferredTargetDecls() const10855 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10856 for (const VarDecl *VD : DeferredGlobalVariables) {
10857 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10858 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10859 if (!Res)
10860 continue;
10861 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10862 !HasRequiresUnifiedSharedMemory) {
10863 CGM.EmitGlobal(VD);
10864 } else {
10865 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10866 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10867 HasRequiresUnifiedSharedMemory)) &&
10868 "Expected link clause or to clause with unified memory.");
10869 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10870 }
10871 }
10872 }
10873
adjustTargetSpecificDataForLambdas(CodeGenFunction & CGF,const OMPExecutableDirective & D) const10874 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10875 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10876 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10877 " Expected target-based directive.");
10878 }
10879
processRequiresDirective(const OMPRequiresDecl * D)10880 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10881 for (const OMPClause *Clause : D->clauselists()) {
10882 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10883 HasRequiresUnifiedSharedMemory = true;
10884 } else if (const auto *AC =
10885 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10886 switch (AC->getAtomicDefaultMemOrderKind()) {
10887 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10888 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10889 break;
10890 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10891 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10892 break;
10893 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10894 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10895 break;
10896 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10897 break;
10898 }
10899 }
10900 }
10901 }
10902
getDefaultMemoryOrdering() const10903 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10904 return RequiresAtomicOrdering;
10905 }
10906
hasAllocateAttributeForGlobalVar(const VarDecl * VD,LangAS & AS)10907 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10908 LangAS &AS) {
10909 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10910 return false;
10911 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10912 switch(A->getAllocatorType()) {
10913 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10914 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10915 // Not supported, fallback to the default mem space.
10916 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10917 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10918 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10919 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10920 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10921 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10922 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10923 AS = LangAS::Default;
10924 return true;
10925 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10926 llvm_unreachable("Expected predefined allocator for the variables with the "
10927 "static storage.");
10928 }
10929 return false;
10930 }
10931
hasRequiresUnifiedSharedMemory() const10932 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10933 return HasRequiresUnifiedSharedMemory;
10934 }
10935
DisableAutoDeclareTargetRAII(CodeGenModule & CGM)10936 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10937 CodeGenModule &CGM)
10938 : CGM(CGM) {
10939 if (CGM.getLangOpts().OpenMPIsDevice) {
10940 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10941 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10942 }
10943 }
10944
~DisableAutoDeclareTargetRAII()10945 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10946 if (CGM.getLangOpts().OpenMPIsDevice)
10947 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10948 }
10949
markAsGlobalTarget(GlobalDecl GD)10950 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10951 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10952 return true;
10953
10954 const auto *D = cast<FunctionDecl>(GD.getDecl());
10955 // Do not to emit function if it is marked as declare target as it was already
10956 // emitted.
10957 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10958 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10959 if (auto *F = dyn_cast_or_null<llvm::Function>(
10960 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10961 return !F->isDeclaration();
10962 return false;
10963 }
10964 return true;
10965 }
10966
10967 return !AlreadyEmittedTargetDecls.insert(D).second;
10968 }
10969
emitRequiresDirectiveRegFun()10970 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10971 // If we don't have entries or if we are emitting code for the device, we
10972 // don't need to do anything.
10973 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10974 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10975 (OffloadEntriesInfoManager.empty() &&
10976 !HasEmittedDeclareTargetRegion &&
10977 !HasEmittedTargetRegion))
10978 return nullptr;
10979
10980 // Create and register the function that handles the requires directives.
10981 ASTContext &C = CGM.getContext();
10982
10983 llvm::Function *RequiresRegFn;
10984 {
10985 CodeGenFunction CGF(CGM);
10986 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10987 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10988 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10989 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10990 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10991 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10992 // TODO: check for other requires clauses.
10993 // The requires directive takes effect only when a target region is
10994 // present in the compilation unit. Otherwise it is ignored and not
10995 // passed to the runtime. This avoids the runtime from throwing an error
10996 // for mismatching requires clauses across compilation units that don't
10997 // contain at least 1 target region.
10998 assert((HasEmittedTargetRegion ||
10999 HasEmittedDeclareTargetRegion ||
11000 !OffloadEntriesInfoManager.empty()) &&
11001 "Target or declare target region expected.");
11002 if (HasRequiresUnifiedSharedMemory)
11003 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11004 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11005 CGM.getModule(), OMPRTL___tgt_register_requires),
11006 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11007 CGF.FinishFunction();
11008 }
11009 return RequiresRegFn;
11010 }
11011
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)11012 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11013 const OMPExecutableDirective &D,
11014 SourceLocation Loc,
11015 llvm::Function *OutlinedFn,
11016 ArrayRef<llvm::Value *> CapturedVars) {
11017 if (!CGF.HaveInsertPoint())
11018 return;
11019
11020 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11021 CodeGenFunction::RunCleanupsScope Scope(CGF);
11022
11023 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11024 llvm::Value *Args[] = {
11025 RTLoc,
11026 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11027 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11028 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11029 RealArgs.append(std::begin(Args), std::end(Args));
11030 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11031
11032 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11033 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11034 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11035 }
11036
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)11037 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11038 const Expr *NumTeams,
11039 const Expr *ThreadLimit,
11040 SourceLocation Loc) {
11041 if (!CGF.HaveInsertPoint())
11042 return;
11043
11044 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11045
11046 llvm::Value *NumTeamsVal =
11047 NumTeams
11048 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11049 CGF.CGM.Int32Ty, /* isSigned = */ true)
11050 : CGF.Builder.getInt32(0);
11051
11052 llvm::Value *ThreadLimitVal =
11053 ThreadLimit
11054 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11055 CGF.CGM.Int32Ty, /* isSigned = */ true)
11056 : CGF.Builder.getInt32(0);
11057
11058 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11059 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11060 ThreadLimitVal};
11061 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11062 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11063 PushNumTeamsArgs);
11064 }
11065
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)11066 void CGOpenMPRuntime::emitTargetDataCalls(
11067 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11068 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11069 if (!CGF.HaveInsertPoint())
11070 return;
11071
11072 // Action used to replace the default codegen action and turn privatization
11073 // off.
11074 PrePostActionTy NoPrivAction;
11075
11076 // Generate the code for the opening of the data environment. Capture all the
11077 // arguments of the runtime call by reference because they are used in the
11078 // closing of the region.
11079 auto &&BeginThenGen = [this, &D, Device, &Info,
11080 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11081 // Fill up the arrays with all the mapped variables.
11082 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11083
11084 // Get map clause information.
11085 MappableExprsHandler MEHandler(D, CGF);
11086 MEHandler.generateAllInfo(CombinedInfo);
11087
11088 // Fill up the arrays and create the arguments.
11089 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11090 /*IsNonContiguous=*/true);
11091
11092 llvm::Value *BasePointersArrayArg = nullptr;
11093 llvm::Value *PointersArrayArg = nullptr;
11094 llvm::Value *SizesArrayArg = nullptr;
11095 llvm::Value *MapTypesArrayArg = nullptr;
11096 llvm::Value *MapNamesArrayArg = nullptr;
11097 llvm::Value *MappersArrayArg = nullptr;
11098 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11099 SizesArrayArg, MapTypesArrayArg,
11100 MapNamesArrayArg, MappersArrayArg, Info);
11101
11102 // Emit device ID if any.
11103 llvm::Value *DeviceID = nullptr;
11104 if (Device) {
11105 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11106 CGF.Int64Ty, /*isSigned=*/true);
11107 } else {
11108 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11109 }
11110
11111 // Emit the number of elements in the offloading arrays.
11112 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11113 //
11114 // Source location for the ident struct
11115 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11116
11117 llvm::Value *OffloadingArgs[] = {RTLoc,
11118 DeviceID,
11119 PointerNum,
11120 BasePointersArrayArg,
11121 PointersArrayArg,
11122 SizesArrayArg,
11123 MapTypesArrayArg,
11124 MapNamesArrayArg,
11125 MappersArrayArg};
11126 CGF.EmitRuntimeCall(
11127 OMPBuilder.getOrCreateRuntimeFunction(
11128 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11129 OffloadingArgs);
11130
11131 // If device pointer privatization is required, emit the body of the region
11132 // here. It will have to be duplicated: with and without privatization.
11133 if (!Info.CaptureDeviceAddrMap.empty())
11134 CodeGen(CGF);
11135 };
11136
11137 // Generate code for the closing of the data region.
11138 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11139 PrePostActionTy &) {
11140 assert(Info.isValid() && "Invalid data environment closing arguments.");
11141
11142 llvm::Value *BasePointersArrayArg = nullptr;
11143 llvm::Value *PointersArrayArg = nullptr;
11144 llvm::Value *SizesArrayArg = nullptr;
11145 llvm::Value *MapTypesArrayArg = nullptr;
11146 llvm::Value *MapNamesArrayArg = nullptr;
11147 llvm::Value *MappersArrayArg = nullptr;
11148 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11149 SizesArrayArg, MapTypesArrayArg,
11150 MapNamesArrayArg, MappersArrayArg, Info,
11151 {/*ForEndCall=*/true});
11152
11153 // Emit device ID if any.
11154 llvm::Value *DeviceID = nullptr;
11155 if (Device) {
11156 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11157 CGF.Int64Ty, /*isSigned=*/true);
11158 } else {
11159 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11160 }
11161
11162 // Emit the number of elements in the offloading arrays.
11163 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11164
11165 // Source location for the ident struct
11166 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11167
11168 llvm::Value *OffloadingArgs[] = {RTLoc,
11169 DeviceID,
11170 PointerNum,
11171 BasePointersArrayArg,
11172 PointersArrayArg,
11173 SizesArrayArg,
11174 MapTypesArrayArg,
11175 MapNamesArrayArg,
11176 MappersArrayArg};
11177 CGF.EmitRuntimeCall(
11178 OMPBuilder.getOrCreateRuntimeFunction(
11179 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11180 OffloadingArgs);
11181 };
11182
11183 // If we need device pointer privatization, we need to emit the body of the
11184 // region with no privatization in the 'else' branch of the conditional.
11185 // Otherwise, we don't have to do anything.
11186 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11187 PrePostActionTy &) {
11188 if (!Info.CaptureDeviceAddrMap.empty()) {
11189 CodeGen.setAction(NoPrivAction);
11190 CodeGen(CGF);
11191 }
11192 };
11193
11194 // We don't have to do anything to close the region if the if clause evaluates
11195 // to false.
11196 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11197
11198 if (IfCond) {
11199 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11200 } else {
11201 RegionCodeGenTy RCG(BeginThenGen);
11202 RCG(CGF);
11203 }
11204
11205 // If we don't require privatization of device pointers, we emit the body in
11206 // between the runtime calls. This avoids duplicating the body code.
11207 if (Info.CaptureDeviceAddrMap.empty()) {
11208 CodeGen.setAction(NoPrivAction);
11209 CodeGen(CGF);
11210 }
11211
11212 if (IfCond) {
11213 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11214 } else {
11215 RegionCodeGenTy RCG(EndThenGen);
11216 RCG(CGF);
11217 }
11218 }
11219
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)11220 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11221 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11222 const Expr *Device) {
11223 if (!CGF.HaveInsertPoint())
11224 return;
11225
11226 assert((isa<OMPTargetEnterDataDirective>(D) ||
11227 isa<OMPTargetExitDataDirective>(D) ||
11228 isa<OMPTargetUpdateDirective>(D)) &&
11229 "Expecting either target enter, exit data, or update directives.");
11230
11231 CodeGenFunction::OMPTargetDataInfo InputInfo;
11232 llvm::Value *MapTypesArray = nullptr;
11233 llvm::Value *MapNamesArray = nullptr;
11234 // Generate the code for the opening of the data environment.
11235 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11236 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11237 // Emit device ID if any.
11238 llvm::Value *DeviceID = nullptr;
11239 if (Device) {
11240 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11241 CGF.Int64Ty, /*isSigned=*/true);
11242 } else {
11243 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11244 }
11245
11246 // Emit the number of elements in the offloading arrays.
11247 llvm::Constant *PointerNum =
11248 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11249
11250 // Source location for the ident struct
11251 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11252
11253 llvm::Value *OffloadingArgs[] = {RTLoc,
11254 DeviceID,
11255 PointerNum,
11256 InputInfo.BasePointersArray.getPointer(),
11257 InputInfo.PointersArray.getPointer(),
11258 InputInfo.SizesArray.getPointer(),
11259 MapTypesArray,
11260 MapNamesArray,
11261 InputInfo.MappersArray.getPointer()};
11262
11263 // Select the right runtime function call for each standalone
11264 // directive.
11265 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11266 RuntimeFunction RTLFn;
11267 switch (D.getDirectiveKind()) {
11268 case OMPD_target_enter_data:
11269 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11270 : OMPRTL___tgt_target_data_begin_mapper;
11271 break;
11272 case OMPD_target_exit_data:
11273 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11274 : OMPRTL___tgt_target_data_end_mapper;
11275 break;
11276 case OMPD_target_update:
11277 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11278 : OMPRTL___tgt_target_data_update_mapper;
11279 break;
11280 case OMPD_parallel:
11281 case OMPD_for:
11282 case OMPD_parallel_for:
11283 case OMPD_parallel_master:
11284 case OMPD_parallel_sections:
11285 case OMPD_for_simd:
11286 case OMPD_parallel_for_simd:
11287 case OMPD_cancel:
11288 case OMPD_cancellation_point:
11289 case OMPD_ordered:
11290 case OMPD_threadprivate:
11291 case OMPD_allocate:
11292 case OMPD_task:
11293 case OMPD_simd:
11294 case OMPD_tile:
11295 case OMPD_unroll:
11296 case OMPD_sections:
11297 case OMPD_section:
11298 case OMPD_single:
11299 case OMPD_master:
11300 case OMPD_critical:
11301 case OMPD_taskyield:
11302 case OMPD_barrier:
11303 case OMPD_taskwait:
11304 case OMPD_taskgroup:
11305 case OMPD_atomic:
11306 case OMPD_flush:
11307 case OMPD_depobj:
11308 case OMPD_scan:
11309 case OMPD_teams:
11310 case OMPD_target_data:
11311 case OMPD_distribute:
11312 case OMPD_distribute_simd:
11313 case OMPD_distribute_parallel_for:
11314 case OMPD_distribute_parallel_for_simd:
11315 case OMPD_teams_distribute:
11316 case OMPD_teams_distribute_simd:
11317 case OMPD_teams_distribute_parallel_for:
11318 case OMPD_teams_distribute_parallel_for_simd:
11319 case OMPD_declare_simd:
11320 case OMPD_declare_variant:
11321 case OMPD_begin_declare_variant:
11322 case OMPD_end_declare_variant:
11323 case OMPD_declare_target:
11324 case OMPD_end_declare_target:
11325 case OMPD_declare_reduction:
11326 case OMPD_declare_mapper:
11327 case OMPD_taskloop:
11328 case OMPD_taskloop_simd:
11329 case OMPD_master_taskloop:
11330 case OMPD_master_taskloop_simd:
11331 case OMPD_parallel_master_taskloop:
11332 case OMPD_parallel_master_taskloop_simd:
11333 case OMPD_target:
11334 case OMPD_target_simd:
11335 case OMPD_target_teams_distribute:
11336 case OMPD_target_teams_distribute_simd:
11337 case OMPD_target_teams_distribute_parallel_for:
11338 case OMPD_target_teams_distribute_parallel_for_simd:
11339 case OMPD_target_teams:
11340 case OMPD_target_parallel:
11341 case OMPD_target_parallel_for:
11342 case OMPD_target_parallel_for_simd:
11343 case OMPD_requires:
11344 case OMPD_unknown:
11345 default:
11346 llvm_unreachable("Unexpected standalone target data directive.");
11347 break;
11348 }
11349 CGF.EmitRuntimeCall(
11350 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11351 OffloadingArgs);
11352 };
11353
11354 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11355 &MapNamesArray](CodeGenFunction &CGF,
11356 PrePostActionTy &) {
11357 // Fill up the arrays with all the mapped variables.
11358 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11359
11360 // Get map clause information.
11361 MappableExprsHandler MEHandler(D, CGF);
11362 MEHandler.generateAllInfo(CombinedInfo);
11363
11364 TargetDataInfo Info;
11365 // Fill up the arrays and create the arguments.
11366 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11367 /*IsNonContiguous=*/true);
11368 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11369 D.hasClausesOfKind<OMPNowaitClause>();
11370 emitOffloadingArraysArgument(
11371 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11372 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11373 {/*ForEndTask=*/false});
11374 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11375 InputInfo.BasePointersArray =
11376 Address(Info.BasePointersArray, CGM.getPointerAlign());
11377 InputInfo.PointersArray =
11378 Address(Info.PointersArray, CGM.getPointerAlign());
11379 InputInfo.SizesArray =
11380 Address(Info.SizesArray, CGM.getPointerAlign());
11381 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11382 MapTypesArray = Info.MapTypesArray;
11383 MapNamesArray = Info.MapNamesArray;
11384 if (RequiresOuterTask)
11385 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11386 else
11387 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11388 };
11389
11390 if (IfCond) {
11391 emitIfClause(CGF, IfCond, TargetThenGen,
11392 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11393 } else {
11394 RegionCodeGenTy ThenRCG(TargetThenGen);
11395 ThenRCG(CGF);
11396 }
11397 }
11398
11399 namespace {
11400 /// Kind of parameter in a function with 'declare simd' directive.
11401 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11402 /// Attribute set of the parameter.
11403 struct ParamAttrTy {
11404 ParamKindTy Kind = Vector;
11405 llvm::APSInt StrideOrArg;
11406 llvm::APSInt Alignment;
11407 };
11408 } // namespace
11409
evaluateCDTSize(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)11410 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11411 ArrayRef<ParamAttrTy> ParamAttrs) {
11412 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11413 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11414 // of that clause. The VLEN value must be power of 2.
11415 // In other case the notion of the function`s "characteristic data type" (CDT)
11416 // is used to compute the vector length.
11417 // CDT is defined in the following order:
11418 // a) For non-void function, the CDT is the return type.
11419 // b) If the function has any non-uniform, non-linear parameters, then the
11420 // CDT is the type of the first such parameter.
11421 // c) If the CDT determined by a) or b) above is struct, union, or class
11422 // type which is pass-by-value (except for the type that maps to the
11423 // built-in complex data type), the characteristic data type is int.
11424 // d) If none of the above three cases is applicable, the CDT is int.
11425 // The VLEN is then determined based on the CDT and the size of vector
11426 // register of that ISA for which current vector version is generated. The
11427 // VLEN is computed using the formula below:
11428 // VLEN = sizeof(vector_register) / sizeof(CDT),
11429 // where vector register size specified in section 3.2.1 Registers and the
11430 // Stack Frame of original AMD64 ABI document.
11431 QualType RetType = FD->getReturnType();
11432 if (RetType.isNull())
11433 return 0;
11434 ASTContext &C = FD->getASTContext();
11435 QualType CDT;
11436 if (!RetType.isNull() && !RetType->isVoidType()) {
11437 CDT = RetType;
11438 } else {
11439 unsigned Offset = 0;
11440 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11441 if (ParamAttrs[Offset].Kind == Vector)
11442 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11443 ++Offset;
11444 }
11445 if (CDT.isNull()) {
11446 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11447 if (ParamAttrs[I + Offset].Kind == Vector) {
11448 CDT = FD->getParamDecl(I)->getType();
11449 break;
11450 }
11451 }
11452 }
11453 }
11454 if (CDT.isNull())
11455 CDT = C.IntTy;
11456 CDT = CDT->getCanonicalTypeUnqualified();
11457 if (CDT->isRecordType() || CDT->isUnionType())
11458 CDT = C.IntTy;
11459 return C.getTypeSize(CDT);
11460 }
11461
11462 static void
emitX86DeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn,const llvm::APSInt & VLENVal,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State)11463 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11464 const llvm::APSInt &VLENVal,
11465 ArrayRef<ParamAttrTy> ParamAttrs,
11466 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11467 struct ISADataTy {
11468 char ISA;
11469 unsigned VecRegSize;
11470 };
11471 ISADataTy ISAData[] = {
11472 {
11473 'b', 128
11474 }, // SSE
11475 {
11476 'c', 256
11477 }, // AVX
11478 {
11479 'd', 256
11480 }, // AVX2
11481 {
11482 'e', 512
11483 }, // AVX512
11484 };
11485 llvm::SmallVector<char, 2> Masked;
11486 switch (State) {
11487 case OMPDeclareSimdDeclAttr::BS_Undefined:
11488 Masked.push_back('N');
11489 Masked.push_back('M');
11490 break;
11491 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11492 Masked.push_back('N');
11493 break;
11494 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11495 Masked.push_back('M');
11496 break;
11497 }
11498 for (char Mask : Masked) {
11499 for (const ISADataTy &Data : ISAData) {
11500 SmallString<256> Buffer;
11501 llvm::raw_svector_ostream Out(Buffer);
11502 Out << "_ZGV" << Data.ISA << Mask;
11503 if (!VLENVal) {
11504 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11505 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11506 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11507 } else {
11508 Out << VLENVal;
11509 }
11510 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11511 switch (ParamAttr.Kind){
11512 case LinearWithVarStride:
11513 Out << 's' << ParamAttr.StrideOrArg;
11514 break;
11515 case Linear:
11516 Out << 'l';
11517 if (ParamAttr.StrideOrArg != 1)
11518 Out << ParamAttr.StrideOrArg;
11519 break;
11520 case Uniform:
11521 Out << 'u';
11522 break;
11523 case Vector:
11524 Out << 'v';
11525 break;
11526 }
11527 if (!!ParamAttr.Alignment)
11528 Out << 'a' << ParamAttr.Alignment;
11529 }
11530 Out << '_' << Fn->getName();
11531 Fn->addFnAttr(Out.str());
11532 }
11533 }
11534 }
11535
11536 // This are the Functions that are needed to mangle the name of the
11537 // vector functions generated by the compiler, according to the rules
11538 // defined in the "Vector Function ABI specifications for AArch64",
11539 // available at
11540 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11541
11542 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11543 ///
11544 /// TODO: Need to implement the behavior for reference marked with a
11545 /// var or no linear modifiers (1.b in the section). For this, we
11546 /// need to extend ParamKindTy to support the linear modifiers.
getAArch64MTV(QualType QT,ParamKindTy Kind)11547 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11548 QT = QT.getCanonicalType();
11549
11550 if (QT->isVoidType())
11551 return false;
11552
11553 if (Kind == ParamKindTy::Uniform)
11554 return false;
11555
11556 if (Kind == ParamKindTy::Linear)
11557 return false;
11558
11559 // TODO: Handle linear references with modifiers
11560
11561 if (Kind == ParamKindTy::LinearWithVarStride)
11562 return false;
11563
11564 return true;
11565 }
11566
11567 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
getAArch64PBV(QualType QT,ASTContext & C)11568 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11569 QT = QT.getCanonicalType();
11570 unsigned Size = C.getTypeSize(QT);
11571
11572 // Only scalars and complex within 16 bytes wide set PVB to true.
11573 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11574 return false;
11575
11576 if (QT->isFloatingType())
11577 return true;
11578
11579 if (QT->isIntegerType())
11580 return true;
11581
11582 if (QT->isPointerType())
11583 return true;
11584
11585 // TODO: Add support for complex types (section 3.1.2, item 2).
11586
11587 return false;
11588 }
11589
11590 /// Computes the lane size (LS) of a return type or of an input parameter,
11591 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11592 /// TODO: Add support for references, section 3.2.1, item 1.
getAArch64LS(QualType QT,ParamKindTy Kind,ASTContext & C)11593 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11594 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11595 QualType PTy = QT.getCanonicalType()->getPointeeType();
11596 if (getAArch64PBV(PTy, C))
11597 return C.getTypeSize(PTy);
11598 }
11599 if (getAArch64PBV(QT, C))
11600 return C.getTypeSize(QT);
11601
11602 return C.getTypeSize(C.getUIntPtrType());
11603 }
11604
11605 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11606 // signature of the scalar function, as defined in 3.2.2 of the
11607 // AAVFABI.
11608 static std::tuple<unsigned, unsigned, bool>
getNDSWDS(const FunctionDecl * FD,ArrayRef<ParamAttrTy> ParamAttrs)11609 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11610 QualType RetType = FD->getReturnType().getCanonicalType();
11611
11612 ASTContext &C = FD->getASTContext();
11613
11614 bool OutputBecomesInput = false;
11615
11616 llvm::SmallVector<unsigned, 8> Sizes;
11617 if (!RetType->isVoidType()) {
11618 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11619 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11620 OutputBecomesInput = true;
11621 }
11622 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11623 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11624 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11625 }
11626
11627 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11628 // The LS of a function parameter / return value can only be a power
11629 // of 2, starting from 8 bits, up to 128.
11630 assert(std::all_of(Sizes.begin(), Sizes.end(),
11631 [](unsigned Size) {
11632 return Size == 8 || Size == 16 || Size == 32 ||
11633 Size == 64 || Size == 128;
11634 }) &&
11635 "Invalid size");
11636
11637 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11638 *std::max_element(std::begin(Sizes), std::end(Sizes)),
11639 OutputBecomesInput);
11640 }
11641
11642 /// Mangle the parameter part of the vector function name according to
11643 /// their OpenMP classification. The mangling function is defined in
11644 /// section 3.5 of the AAVFABI.
mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs)11645 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11646 SmallString<256> Buffer;
11647 llvm::raw_svector_ostream Out(Buffer);
11648 for (const auto &ParamAttr : ParamAttrs) {
11649 switch (ParamAttr.Kind) {
11650 case LinearWithVarStride:
11651 Out << "ls" << ParamAttr.StrideOrArg;
11652 break;
11653 case Linear:
11654 Out << 'l';
11655 // Don't print the step value if it is not present or if it is
11656 // equal to 1.
11657 if (ParamAttr.StrideOrArg != 1)
11658 Out << ParamAttr.StrideOrArg;
11659 break;
11660 case Uniform:
11661 Out << 'u';
11662 break;
11663 case Vector:
11664 Out << 'v';
11665 break;
11666 }
11667
11668 if (!!ParamAttr.Alignment)
11669 Out << 'a' << ParamAttr.Alignment;
11670 }
11671
11672 return std::string(Out.str());
11673 }
11674
11675 // Function used to add the attribute. The parameter `VLEN` is
11676 // templated to allow the use of "x" when targeting scalable functions
11677 // for SVE.
11678 template <typename T>
addAArch64VectorName(T VLEN,StringRef LMask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)11679 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11680 char ISA, StringRef ParSeq,
11681 StringRef MangledName, bool OutputBecomesInput,
11682 llvm::Function *Fn) {
11683 SmallString<256> Buffer;
11684 llvm::raw_svector_ostream Out(Buffer);
11685 Out << Prefix << ISA << LMask << VLEN;
11686 if (OutputBecomesInput)
11687 Out << "v";
11688 Out << ParSeq << "_" << MangledName;
11689 Fn->addFnAttr(Out.str());
11690 }
11691
11692 // Helper function to generate the Advanced SIMD names depending on
11693 // the value of the NDS when simdlen is not present.
addAArch64AdvSIMDNDSNames(unsigned NDS,StringRef Mask,StringRef Prefix,char ISA,StringRef ParSeq,StringRef MangledName,bool OutputBecomesInput,llvm::Function * Fn)11694 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11695 StringRef Prefix, char ISA,
11696 StringRef ParSeq, StringRef MangledName,
11697 bool OutputBecomesInput,
11698 llvm::Function *Fn) {
11699 switch (NDS) {
11700 case 8:
11701 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11702 OutputBecomesInput, Fn);
11703 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11704 OutputBecomesInput, Fn);
11705 break;
11706 case 16:
11707 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11708 OutputBecomesInput, Fn);
11709 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11710 OutputBecomesInput, Fn);
11711 break;
11712 case 32:
11713 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11714 OutputBecomesInput, Fn);
11715 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11716 OutputBecomesInput, Fn);
11717 break;
11718 case 64:
11719 case 128:
11720 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11721 OutputBecomesInput, Fn);
11722 break;
11723 default:
11724 llvm_unreachable("Scalar type is too wide.");
11725 }
11726 }
11727
11728 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
emitAArch64DeclareSimdFunction(CodeGenModule & CGM,const FunctionDecl * FD,unsigned UserVLEN,ArrayRef<ParamAttrTy> ParamAttrs,OMPDeclareSimdDeclAttr::BranchStateTy State,StringRef MangledName,char ISA,unsigned VecRegSize,llvm::Function * Fn,SourceLocation SLoc)11729 static void emitAArch64DeclareSimdFunction(
11730 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11731 ArrayRef<ParamAttrTy> ParamAttrs,
11732 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11733 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11734
11735 // Get basic data for building the vector signature.
11736 const auto Data = getNDSWDS(FD, ParamAttrs);
11737 const unsigned NDS = std::get<0>(Data);
11738 const unsigned WDS = std::get<1>(Data);
11739 const bool OutputBecomesInput = std::get<2>(Data);
11740
11741 // Check the values provided via `simdlen` by the user.
11742 // 1. A `simdlen(1)` doesn't produce vector signatures,
11743 if (UserVLEN == 1) {
11744 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11745 DiagnosticsEngine::Warning,
11746 "The clause simdlen(1) has no effect when targeting aarch64.");
11747 CGM.getDiags().Report(SLoc, DiagID);
11748 return;
11749 }
11750
11751 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11752 // Advanced SIMD output.
11753 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11754 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11755 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11756 "power of 2 when targeting Advanced SIMD.");
11757 CGM.getDiags().Report(SLoc, DiagID);
11758 return;
11759 }
11760
11761 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11762 // limits.
11763 if (ISA == 's' && UserVLEN != 0) {
11764 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11765 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11766 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11767 "lanes in the architectural constraints "
11768 "for SVE (min is 128-bit, max is "
11769 "2048-bit, by steps of 128-bit)");
11770 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11771 return;
11772 }
11773 }
11774
11775 // Sort out parameter sequence.
11776 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11777 StringRef Prefix = "_ZGV";
11778 // Generate simdlen from user input (if any).
11779 if (UserVLEN) {
11780 if (ISA == 's') {
11781 // SVE generates only a masked function.
11782 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11783 OutputBecomesInput, Fn);
11784 } else {
11785 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11786 // Advanced SIMD generates one or two functions, depending on
11787 // the `[not]inbranch` clause.
11788 switch (State) {
11789 case OMPDeclareSimdDeclAttr::BS_Undefined:
11790 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11791 OutputBecomesInput, Fn);
11792 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11793 OutputBecomesInput, Fn);
11794 break;
11795 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11796 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11797 OutputBecomesInput, Fn);
11798 break;
11799 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11800 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11801 OutputBecomesInput, Fn);
11802 break;
11803 }
11804 }
11805 } else {
11806 // If no user simdlen is provided, follow the AAVFABI rules for
11807 // generating the vector length.
11808 if (ISA == 's') {
11809 // SVE, section 3.4.1, item 1.
11810 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11811 OutputBecomesInput, Fn);
11812 } else {
11813 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11814 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11815 // two vector names depending on the use of the clause
11816 // `[not]inbranch`.
11817 switch (State) {
11818 case OMPDeclareSimdDeclAttr::BS_Undefined:
11819 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11820 OutputBecomesInput, Fn);
11821 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11822 OutputBecomesInput, Fn);
11823 break;
11824 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11825 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11826 OutputBecomesInput, Fn);
11827 break;
11828 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11829 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11830 OutputBecomesInput, Fn);
11831 break;
11832 }
11833 }
11834 }
11835 }
11836
emitDeclareSimdFunction(const FunctionDecl * FD,llvm::Function * Fn)11837 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11838 llvm::Function *Fn) {
11839 ASTContext &C = CGM.getContext();
11840 FD = FD->getMostRecentDecl();
11841 // Map params to their positions in function decl.
11842 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11843 if (isa<CXXMethodDecl>(FD))
11844 ParamPositions.try_emplace(FD, 0);
11845 unsigned ParamPos = ParamPositions.size();
11846 for (const ParmVarDecl *P : FD->parameters()) {
11847 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11848 ++ParamPos;
11849 }
11850 while (FD) {
11851 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11852 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11853 // Mark uniform parameters.
11854 for (const Expr *E : Attr->uniforms()) {
11855 E = E->IgnoreParenImpCasts();
11856 unsigned Pos;
11857 if (isa<CXXThisExpr>(E)) {
11858 Pos = ParamPositions[FD];
11859 } else {
11860 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11861 ->getCanonicalDecl();
11862 Pos = ParamPositions[PVD];
11863 }
11864 ParamAttrs[Pos].Kind = Uniform;
11865 }
11866 // Get alignment info.
11867 auto NI = Attr->alignments_begin();
11868 for (const Expr *E : Attr->aligneds()) {
11869 E = E->IgnoreParenImpCasts();
11870 unsigned Pos;
11871 QualType ParmTy;
11872 if (isa<CXXThisExpr>(E)) {
11873 Pos = ParamPositions[FD];
11874 ParmTy = E->getType();
11875 } else {
11876 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11877 ->getCanonicalDecl();
11878 Pos = ParamPositions[PVD];
11879 ParmTy = PVD->getType();
11880 }
11881 ParamAttrs[Pos].Alignment =
11882 (*NI)
11883 ? (*NI)->EvaluateKnownConstInt(C)
11884 : llvm::APSInt::getUnsigned(
11885 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11886 .getQuantity());
11887 ++NI;
11888 }
11889 // Mark linear parameters.
11890 auto SI = Attr->steps_begin();
11891 auto MI = Attr->modifiers_begin();
11892 for (const Expr *E : Attr->linears()) {
11893 E = E->IgnoreParenImpCasts();
11894 unsigned Pos;
11895 // Rescaling factor needed to compute the linear parameter
11896 // value in the mangled name.
11897 unsigned PtrRescalingFactor = 1;
11898 if (isa<CXXThisExpr>(E)) {
11899 Pos = ParamPositions[FD];
11900 } else {
11901 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11902 ->getCanonicalDecl();
11903 Pos = ParamPositions[PVD];
11904 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11905 PtrRescalingFactor = CGM.getContext()
11906 .getTypeSizeInChars(P->getPointeeType())
11907 .getQuantity();
11908 }
11909 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11910 ParamAttr.Kind = Linear;
11911 // Assuming a stride of 1, for `linear` without modifiers.
11912 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11913 if (*SI) {
11914 Expr::EvalResult Result;
11915 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11916 if (const auto *DRE =
11917 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11918 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11919 ParamAttr.Kind = LinearWithVarStride;
11920 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11921 ParamPositions[StridePVD->getCanonicalDecl()]);
11922 }
11923 }
11924 } else {
11925 ParamAttr.StrideOrArg = Result.Val.getInt();
11926 }
11927 }
11928 // If we are using a linear clause on a pointer, we need to
11929 // rescale the value of linear_step with the byte size of the
11930 // pointee type.
11931 if (Linear == ParamAttr.Kind)
11932 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11933 ++SI;
11934 ++MI;
11935 }
11936 llvm::APSInt VLENVal;
11937 SourceLocation ExprLoc;
11938 const Expr *VLENExpr = Attr->getSimdlen();
11939 if (VLENExpr) {
11940 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11941 ExprLoc = VLENExpr->getExprLoc();
11942 }
11943 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11944 if (CGM.getTriple().isX86()) {
11945 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11946 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11947 unsigned VLEN = VLENVal.getExtValue();
11948 StringRef MangledName = Fn->getName();
11949 if (CGM.getTarget().hasFeature("sve"))
11950 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11951 MangledName, 's', 128, Fn, ExprLoc);
11952 if (CGM.getTarget().hasFeature("neon"))
11953 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11954 MangledName, 'n', 128, Fn, ExprLoc);
11955 }
11956 }
11957 FD = FD->getPreviousDecl();
11958 }
11959 }
11960
11961 namespace {
11962 /// Cleanup action for doacross support.
11963 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11964 public:
11965 static const int DoacrossFinArgs = 2;
11966
11967 private:
11968 llvm::FunctionCallee RTLFn;
11969 llvm::Value *Args[DoacrossFinArgs];
11970
11971 public:
DoacrossCleanupTy(llvm::FunctionCallee RTLFn,ArrayRef<llvm::Value * > CallArgs)11972 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11973 ArrayRef<llvm::Value *> CallArgs)
11974 : RTLFn(RTLFn) {
11975 assert(CallArgs.size() == DoacrossFinArgs);
11976 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11977 }
Emit(CodeGenFunction & CGF,Flags)11978 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11979 if (!CGF.HaveInsertPoint())
11980 return;
11981 CGF.EmitRuntimeCall(RTLFn, Args);
11982 }
11983 };
11984 } // namespace
11985
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)11986 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11987 const OMPLoopDirective &D,
11988 ArrayRef<Expr *> NumIterations) {
11989 if (!CGF.HaveInsertPoint())
11990 return;
11991
11992 ASTContext &C = CGM.getContext();
11993 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11994 RecordDecl *RD;
11995 if (KmpDimTy.isNull()) {
11996 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11997 // kmp_int64 lo; // lower
11998 // kmp_int64 up; // upper
11999 // kmp_int64 st; // stride
12000 // };
12001 RD = C.buildImplicitRecord("kmp_dim");
12002 RD->startDefinition();
12003 addFieldToRecordDecl(C, RD, Int64Ty);
12004 addFieldToRecordDecl(C, RD, Int64Ty);
12005 addFieldToRecordDecl(C, RD, Int64Ty);
12006 RD->completeDefinition();
12007 KmpDimTy = C.getRecordType(RD);
12008 } else {
12009 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12010 }
12011 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12012 QualType ArrayTy =
12013 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12014
12015 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12016 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12017 enum { LowerFD = 0, UpperFD, StrideFD };
12018 // Fill dims with data.
12019 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12020 LValue DimsLVal = CGF.MakeAddrLValue(
12021 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12022 // dims.upper = num_iterations;
12023 LValue UpperLVal = CGF.EmitLValueForField(
12024 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12025 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12026 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12027 Int64Ty, NumIterations[I]->getExprLoc());
12028 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12029 // dims.stride = 1;
12030 LValue StrideLVal = CGF.EmitLValueForField(
12031 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12032 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12033 StrideLVal);
12034 }
12035
12036 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12037 // kmp_int32 num_dims, struct kmp_dim * dims);
12038 llvm::Value *Args[] = {
12039 emitUpdateLocation(CGF, D.getBeginLoc()),
12040 getThreadID(CGF, D.getBeginLoc()),
12041 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12042 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12043 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12044 CGM.VoidPtrTy)};
12045
12046 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12047 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12048 CGF.EmitRuntimeCall(RTLFn, Args);
12049 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12050 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12051 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12052 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12053 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12054 llvm::makeArrayRef(FiniArgs));
12055 }
12056
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)12057 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12058 const OMPDependClause *C) {
12059 QualType Int64Ty =
12060 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12061 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12062 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12063 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12064 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12065 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12066 const Expr *CounterVal = C->getLoopData(I);
12067 assert(CounterVal);
12068 llvm::Value *CntVal = CGF.EmitScalarConversion(
12069 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12070 CounterVal->getExprLoc());
12071 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12072 /*Volatile=*/false, Int64Ty);
12073 }
12074 llvm::Value *Args[] = {
12075 emitUpdateLocation(CGF, C->getBeginLoc()),
12076 getThreadID(CGF, C->getBeginLoc()),
12077 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12078 llvm::FunctionCallee RTLFn;
12079 if (C->getDependencyKind() == OMPC_DEPEND_source) {
12080 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12081 OMPRTL___kmpc_doacross_post);
12082 } else {
12083 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12084 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12085 OMPRTL___kmpc_doacross_wait);
12086 }
12087 CGF.EmitRuntimeCall(RTLFn, Args);
12088 }
12089
emitCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee Callee,ArrayRef<llvm::Value * > Args) const12090 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12091 llvm::FunctionCallee Callee,
12092 ArrayRef<llvm::Value *> Args) const {
12093 assert(Loc.isValid() && "Outlined function call location must be valid.");
12094 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12095
12096 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12097 if (Fn->doesNotThrow()) {
12098 CGF.EmitNounwindRuntimeCall(Fn, Args);
12099 return;
12100 }
12101 }
12102 CGF.EmitRuntimeCall(Callee, Args);
12103 }
12104
emitOutlinedFunctionCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::FunctionCallee OutlinedFn,ArrayRef<llvm::Value * > Args) const12105 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12106 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12107 ArrayRef<llvm::Value *> Args) const {
12108 emitCall(CGF, Loc, OutlinedFn, Args);
12109 }
12110
emitFunctionProlog(CodeGenFunction & CGF,const Decl * D)12111 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12112 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12113 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12114 HasEmittedDeclareTargetRegion = true;
12115 }
12116
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const12117 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12118 const VarDecl *NativeParam,
12119 const VarDecl *TargetParam) const {
12120 return CGF.GetAddrOfLocalVar(NativeParam);
12121 }
12122
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)12123 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12124 const VarDecl *VD) {
12125 if (!VD)
12126 return Address::invalid();
12127 Address UntiedAddr = Address::invalid();
12128 Address UntiedRealAddr = Address::invalid();
12129 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12130 if (It != FunctionToUntiedTaskStackMap.end()) {
12131 const UntiedLocalVarsAddressesMap &UntiedData =
12132 UntiedLocalVarsStack[It->second];
12133 auto I = UntiedData.find(VD);
12134 if (I != UntiedData.end()) {
12135 UntiedAddr = I->second.first;
12136 UntiedRealAddr = I->second.second;
12137 }
12138 }
12139 const VarDecl *CVD = VD->getCanonicalDecl();
12140 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12141 // Use the default allocation.
12142 if (!isAllocatableDecl(VD))
12143 return UntiedAddr;
12144 llvm::Value *Size;
12145 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12146 if (CVD->getType()->isVariablyModifiedType()) {
12147 Size = CGF.getTypeSize(CVD->getType());
12148 // Align the size: ((size + align - 1) / align) * align
12149 Size = CGF.Builder.CreateNUWAdd(
12150 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12151 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12152 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12153 } else {
12154 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12155 Size = CGM.getSize(Sz.alignTo(Align));
12156 }
12157 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12158 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12159 assert(AA->getAllocator() &&
12160 "Expected allocator expression for non-default allocator.");
12161 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12162 // According to the standard, the original allocator type is a enum
12163 // (integer). Convert to pointer type, if required.
12164 Allocator = CGF.EmitScalarConversion(
12165 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12166 AA->getAllocator()->getExprLoc());
12167 llvm::Value *Args[] = {ThreadID, Size, Allocator};
12168
12169 llvm::Value *Addr =
12170 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12171 CGM.getModule(), OMPRTL___kmpc_alloc),
12172 Args, getName({CVD->getName(), ".void.addr"}));
12173 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12174 CGM.getModule(), OMPRTL___kmpc_free);
12175 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12176 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12177 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12178 if (UntiedAddr.isValid())
12179 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12180
12181 // Cleanup action for allocate support.
12182 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12183 llvm::FunctionCallee RTLFn;
12184 SourceLocation::UIntTy LocEncoding;
12185 Address Addr;
12186 const Expr *Allocator;
12187
12188 public:
12189 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12190 SourceLocation::UIntTy LocEncoding, Address Addr,
12191 const Expr *Allocator)
12192 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12193 Allocator(Allocator) {}
12194 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12195 if (!CGF.HaveInsertPoint())
12196 return;
12197 llvm::Value *Args[3];
12198 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12199 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12200 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12201 Addr.getPointer(), CGF.VoidPtrTy);
12202 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12203 // According to the standard, the original allocator type is a enum
12204 // (integer). Convert to pointer type, if required.
12205 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12206 CGF.getContext().VoidPtrTy,
12207 Allocator->getExprLoc());
12208 Args[2] = AllocVal;
12209
12210 CGF.EmitRuntimeCall(RTLFn, Args);
12211 }
12212 };
12213 Address VDAddr =
12214 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12215 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12216 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12217 VDAddr, AA->getAllocator());
12218 if (UntiedRealAddr.isValid())
12219 if (auto *Region =
12220 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12221 Region->emitUntiedSwitch(CGF);
12222 return VDAddr;
12223 }
12224 return UntiedAddr;
12225 }
12226
isLocalVarInUntiedTask(CodeGenFunction & CGF,const VarDecl * VD) const12227 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12228 const VarDecl *VD) const {
12229 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12230 if (It == FunctionToUntiedTaskStackMap.end())
12231 return false;
12232 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12233 }
12234
NontemporalDeclsRAII(CodeGenModule & CGM,const OMPLoopDirective & S)12235 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12236 CodeGenModule &CGM, const OMPLoopDirective &S)
12237 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12238 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12239 if (!NeedToPush)
12240 return;
12241 NontemporalDeclsSet &DS =
12242 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12243 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12244 for (const Stmt *Ref : C->private_refs()) {
12245 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12246 const ValueDecl *VD;
12247 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12248 VD = DRE->getDecl();
12249 } else {
12250 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12251 assert((ME->isImplicitCXXThis() ||
12252 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12253 "Expected member of current class.");
12254 VD = ME->getMemberDecl();
12255 }
12256 DS.insert(VD);
12257 }
12258 }
12259 }
12260
~NontemporalDeclsRAII()12261 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12262 if (!NeedToPush)
12263 return;
12264 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12265 }
12266
UntiedTaskLocalDeclsRAII(CodeGenFunction & CGF,const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,std::pair<Address,Address>> & LocalVars)12267 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12268 CodeGenFunction &CGF,
12269 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12270 std::pair<Address, Address>> &LocalVars)
12271 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12272 if (!NeedToPush)
12273 return;
12274 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12275 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12276 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12277 }
12278
~UntiedTaskLocalDeclsRAII()12279 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12280 if (!NeedToPush)
12281 return;
12282 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12283 }
12284
isNontemporalDecl(const ValueDecl * VD) const12285 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12286 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12287
12288 return llvm::any_of(
12289 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12290 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12291 }
12292
tryToDisableInnerAnalysis(const OMPExecutableDirective & S,llvm::DenseSet<CanonicalDeclPtr<const Decl>> & NeedToAddForLPCsAsDisabled) const12293 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12294 const OMPExecutableDirective &S,
12295 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12296 const {
12297 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12298 // Vars in target/task regions must be excluded completely.
12299 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12300 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12301 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12302 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12303 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12304 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12305 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12306 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12307 }
12308 }
12309 // Exclude vars in private clauses.
12310 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12311 for (const Expr *Ref : C->varlists()) {
12312 if (!Ref->getType()->isScalarType())
12313 continue;
12314 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12315 if (!DRE)
12316 continue;
12317 NeedToCheckForLPCs.insert(DRE->getDecl());
12318 }
12319 }
12320 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12321 for (const Expr *Ref : C->varlists()) {
12322 if (!Ref->getType()->isScalarType())
12323 continue;
12324 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12325 if (!DRE)
12326 continue;
12327 NeedToCheckForLPCs.insert(DRE->getDecl());
12328 }
12329 }
12330 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12331 for (const Expr *Ref : C->varlists()) {
12332 if (!Ref->getType()->isScalarType())
12333 continue;
12334 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12335 if (!DRE)
12336 continue;
12337 NeedToCheckForLPCs.insert(DRE->getDecl());
12338 }
12339 }
12340 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12341 for (const Expr *Ref : C->varlists()) {
12342 if (!Ref->getType()->isScalarType())
12343 continue;
12344 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12345 if (!DRE)
12346 continue;
12347 NeedToCheckForLPCs.insert(DRE->getDecl());
12348 }
12349 }
12350 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12351 for (const Expr *Ref : C->varlists()) {
12352 if (!Ref->getType()->isScalarType())
12353 continue;
12354 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12355 if (!DRE)
12356 continue;
12357 NeedToCheckForLPCs.insert(DRE->getDecl());
12358 }
12359 }
12360 for (const Decl *VD : NeedToCheckForLPCs) {
12361 for (const LastprivateConditionalData &Data :
12362 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12363 if (Data.DeclToUniqueName.count(VD) > 0) {
12364 if (!Data.Disabled)
12365 NeedToAddForLPCsAsDisabled.insert(VD);
12366 break;
12367 }
12368 }
12369 }
12370 }
12371
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S,LValue IVLVal)12372 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12373 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12374 : CGM(CGF.CGM),
12375 Action((CGM.getLangOpts().OpenMP >= 50 &&
12376 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12377 [](const OMPLastprivateClause *C) {
12378 return C->getKind() ==
12379 OMPC_LASTPRIVATE_conditional;
12380 }))
12381 ? ActionToDo::PushAsLastprivateConditional
12382 : ActionToDo::DoNotPush) {
12383 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12384 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12385 return;
12386 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12387 "Expected a push action.");
12388 LastprivateConditionalData &Data =
12389 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12390 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12391 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12392 continue;
12393
12394 for (const Expr *Ref : C->varlists()) {
12395 Data.DeclToUniqueName.insert(std::make_pair(
12396 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12397 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12398 }
12399 }
12400 Data.IVLVal = IVLVal;
12401 Data.Fn = CGF.CurFn;
12402 }
12403
LastprivateConditionalRAII(CodeGenFunction & CGF,const OMPExecutableDirective & S)12404 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12405 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12406 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12407 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12408 if (CGM.getLangOpts().OpenMP < 50)
12409 return;
12410 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12411 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12412 if (!NeedToAddForLPCsAsDisabled.empty()) {
12413 Action = ActionToDo::DisableLastprivateConditional;
12414 LastprivateConditionalData &Data =
12415 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12416 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12417 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12418 Data.Fn = CGF.CurFn;
12419 Data.Disabled = true;
12420 }
12421 }
12422
12423 CGOpenMPRuntime::LastprivateConditionalRAII
disable(CodeGenFunction & CGF,const OMPExecutableDirective & S)12424 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12425 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12426 return LastprivateConditionalRAII(CGF, S);
12427 }
12428
~LastprivateConditionalRAII()12429 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12430 if (CGM.getLangOpts().OpenMP < 50)
12431 return;
12432 if (Action == ActionToDo::DisableLastprivateConditional) {
12433 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12434 "Expected list of disabled private vars.");
12435 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12436 }
12437 if (Action == ActionToDo::PushAsLastprivateConditional) {
12438 assert(
12439 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12440 "Expected list of lastprivate conditional vars.");
12441 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12442 }
12443 }
12444
emitLastprivateConditionalInit(CodeGenFunction & CGF,const VarDecl * VD)12445 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12446 const VarDecl *VD) {
12447 ASTContext &C = CGM.getContext();
12448 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12449 if (I == LastprivateConditionalToTypes.end())
12450 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12451 QualType NewType;
12452 const FieldDecl *VDField;
12453 const FieldDecl *FiredField;
12454 LValue BaseLVal;
12455 auto VI = I->getSecond().find(VD);
12456 if (VI == I->getSecond().end()) {
12457 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12458 RD->startDefinition();
12459 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12460 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12461 RD->completeDefinition();
12462 NewType = C.getRecordType(RD);
12463 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12464 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12465 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12466 } else {
12467 NewType = std::get<0>(VI->getSecond());
12468 VDField = std::get<1>(VI->getSecond());
12469 FiredField = std::get<2>(VI->getSecond());
12470 BaseLVal = std::get<3>(VI->getSecond());
12471 }
12472 LValue FiredLVal =
12473 CGF.EmitLValueForField(BaseLVal, FiredField);
12474 CGF.EmitStoreOfScalar(
12475 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12476 FiredLVal);
12477 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12478 }
12479
12480 namespace {
12481 /// Checks if the lastprivate conditional variable is referenced in LHS.
12482 class LastprivateConditionalRefChecker final
12483 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12484 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12485 const Expr *FoundE = nullptr;
12486 const Decl *FoundD = nullptr;
12487 StringRef UniqueDeclName;
12488 LValue IVLVal;
12489 llvm::Function *FoundFn = nullptr;
12490 SourceLocation Loc;
12491
12492 public:
VisitDeclRefExpr(const DeclRefExpr * E)12493 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12494 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12495 llvm::reverse(LPM)) {
12496 auto It = D.DeclToUniqueName.find(E->getDecl());
12497 if (It == D.DeclToUniqueName.end())
12498 continue;
12499 if (D.Disabled)
12500 return false;
12501 FoundE = E;
12502 FoundD = E->getDecl()->getCanonicalDecl();
12503 UniqueDeclName = It->second;
12504 IVLVal = D.IVLVal;
12505 FoundFn = D.Fn;
12506 break;
12507 }
12508 return FoundE == E;
12509 }
VisitMemberExpr(const MemberExpr * E)12510 bool VisitMemberExpr(const MemberExpr *E) {
12511 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12512 return false;
12513 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12514 llvm::reverse(LPM)) {
12515 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12516 if (It == D.DeclToUniqueName.end())
12517 continue;
12518 if (D.Disabled)
12519 return false;
12520 FoundE = E;
12521 FoundD = E->getMemberDecl()->getCanonicalDecl();
12522 UniqueDeclName = It->second;
12523 IVLVal = D.IVLVal;
12524 FoundFn = D.Fn;
12525 break;
12526 }
12527 return FoundE == E;
12528 }
VisitStmt(const Stmt * S)12529 bool VisitStmt(const Stmt *S) {
12530 for (const Stmt *Child : S->children()) {
12531 if (!Child)
12532 continue;
12533 if (const auto *E = dyn_cast<Expr>(Child))
12534 if (!E->isGLValue())
12535 continue;
12536 if (Visit(Child))
12537 return true;
12538 }
12539 return false;
12540 }
LastprivateConditionalRefChecker(ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)12541 explicit LastprivateConditionalRefChecker(
12542 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12543 : LPM(LPM) {}
12544 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
getFoundData() const12545 getFoundData() const {
12546 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12547 }
12548 };
12549 } // namespace
12550
emitLastprivateConditionalUpdate(CodeGenFunction & CGF,LValue IVLVal,StringRef UniqueDeclName,LValue LVal,SourceLocation Loc)12551 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12552 LValue IVLVal,
12553 StringRef UniqueDeclName,
12554 LValue LVal,
12555 SourceLocation Loc) {
12556 // Last updated loop counter for the lastprivate conditional var.
12557 // int<xx> last_iv = 0;
12558 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12559 llvm::Constant *LastIV =
12560 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12561 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12562 IVLVal.getAlignment().getAsAlign());
12563 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12564
12565 // Last value of the lastprivate conditional.
12566 // decltype(priv_a) last_a;
12567 llvm::Constant *Last = getOrCreateInternalVariable(
12568 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12569 cast<llvm::GlobalVariable>(Last)->setAlignment(
12570 LVal.getAlignment().getAsAlign());
12571 LValue LastLVal =
12572 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12573
12574 // Global loop counter. Required to handle inner parallel-for regions.
12575 // iv
12576 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12577
12578 // #pragma omp critical(a)
12579 // if (last_iv <= iv) {
12580 // last_iv = iv;
12581 // last_a = priv_a;
12582 // }
12583 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12584 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12585 Action.Enter(CGF);
12586 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12587 // (last_iv <= iv) ? Check if the variable is updated and store new
12588 // value in global var.
12589 llvm::Value *CmpRes;
12590 if (IVLVal.getType()->isSignedIntegerType()) {
12591 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12592 } else {
12593 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12594 "Loop iteration variable must be integer.");
12595 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12596 }
12597 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12598 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12599 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12600 // {
12601 CGF.EmitBlock(ThenBB);
12602
12603 // last_iv = iv;
12604 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12605
12606 // last_a = priv_a;
12607 switch (CGF.getEvaluationKind(LVal.getType())) {
12608 case TEK_Scalar: {
12609 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12610 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12611 break;
12612 }
12613 case TEK_Complex: {
12614 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12615 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12616 break;
12617 }
12618 case TEK_Aggregate:
12619 llvm_unreachable(
12620 "Aggregates are not supported in lastprivate conditional.");
12621 }
12622 // }
12623 CGF.EmitBranch(ExitBB);
12624 // There is no need to emit line number for unconditional branch.
12625 (void)ApplyDebugLocation::CreateEmpty(CGF);
12626 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12627 };
12628
12629 if (CGM.getLangOpts().OpenMPSimd) {
12630 // Do not emit as a critical region as no parallel region could be emitted.
12631 RegionCodeGenTy ThenRCG(CodeGen);
12632 ThenRCG(CGF);
12633 } else {
12634 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12635 }
12636 }
12637
checkAndEmitLastprivateConditional(CodeGenFunction & CGF,const Expr * LHS)12638 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12639 const Expr *LHS) {
12640 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12641 return;
12642 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12643 if (!Checker.Visit(LHS))
12644 return;
12645 const Expr *FoundE;
12646 const Decl *FoundD;
12647 StringRef UniqueDeclName;
12648 LValue IVLVal;
12649 llvm::Function *FoundFn;
12650 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12651 Checker.getFoundData();
12652 if (FoundFn != CGF.CurFn) {
12653 // Special codegen for inner parallel regions.
12654 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12655 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12656 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12657 "Lastprivate conditional is not found in outer region.");
12658 QualType StructTy = std::get<0>(It->getSecond());
12659 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12660 LValue PrivLVal = CGF.EmitLValue(FoundE);
12661 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12662 PrivLVal.getAddress(CGF),
12663 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12664 LValue BaseLVal =
12665 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12666 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12667 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12668 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12669 FiredLVal, llvm::AtomicOrdering::Unordered,
12670 /*IsVolatile=*/true, /*isInit=*/false);
12671 return;
12672 }
12673
12674 // Private address of the lastprivate conditional in the current context.
12675 // priv_a
12676 LValue LVal = CGF.EmitLValue(FoundE);
12677 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12678 FoundE->getExprLoc());
12679 }
12680
checkAndEmitSharedLastprivateConditional(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> & IgnoredDecls)12681 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12682 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12683 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12684 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12685 return;
12686 auto Range = llvm::reverse(LastprivateConditionalStack);
12687 auto It = llvm::find_if(
12688 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12689 if (It == Range.end() || It->Fn != CGF.CurFn)
12690 return;
12691 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12692 assert(LPCI != LastprivateConditionalToTypes.end() &&
12693 "Lastprivates must be registered already.");
12694 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12695 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12696 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12697 for (const auto &Pair : It->DeclToUniqueName) {
12698 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12699 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12700 continue;
12701 auto I = LPCI->getSecond().find(Pair.first);
12702 assert(I != LPCI->getSecond().end() &&
12703 "Lastprivate must be rehistered already.");
12704 // bool Cmp = priv_a.Fired != 0;
12705 LValue BaseLVal = std::get<3>(I->getSecond());
12706 LValue FiredLVal =
12707 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12708 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12709 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12710 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12711 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12712 // if (Cmp) {
12713 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12714 CGF.EmitBlock(ThenBB);
12715 Address Addr = CGF.GetAddrOfLocalVar(VD);
12716 LValue LVal;
12717 if (VD->getType()->isReferenceType())
12718 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12719 AlignmentSource::Decl);
12720 else
12721 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12722 AlignmentSource::Decl);
12723 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12724 D.getBeginLoc());
12725 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12726 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12727 // }
12728 }
12729 }
12730
emitLastprivateConditionalFinalUpdate(CodeGenFunction & CGF,LValue PrivLVal,const VarDecl * VD,SourceLocation Loc)12731 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12732 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12733 SourceLocation Loc) {
12734 if (CGF.getLangOpts().OpenMP < 50)
12735 return;
12736 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12737 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12738 "Unknown lastprivate conditional variable.");
12739 StringRef UniqueName = It->second;
12740 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12741 // The variable was not updated in the region - exit.
12742 if (!GV)
12743 return;
12744 LValue LPLVal = CGF.MakeAddrLValue(
12745 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12746 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12747 CGF.EmitStoreOfScalar(Res, PrivLVal);
12748 }
12749
emitParallelOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12750 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12751 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12752 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12753 llvm_unreachable("Not supported in SIMD-only mode");
12754 }
12755
emitTeamsOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)12756 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12757 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12758 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12759 llvm_unreachable("Not supported in SIMD-only mode");
12760 }
12761
emitTaskOutlinedFunction(const OMPExecutableDirective & D,const VarDecl * ThreadIDVar,const VarDecl * PartIDVar,const VarDecl * TaskTVar,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,bool Tied,unsigned & NumberOfParts)12762 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12763 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12764 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12765 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12766 bool Tied, unsigned &NumberOfParts) {
12767 llvm_unreachable("Not supported in SIMD-only mode");
12768 }
12769
emitParallelCall(CodeGenFunction & CGF,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars,const Expr * IfCond)12770 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12771 SourceLocation Loc,
12772 llvm::Function *OutlinedFn,
12773 ArrayRef<llvm::Value *> CapturedVars,
12774 const Expr *IfCond) {
12775 llvm_unreachable("Not supported in SIMD-only mode");
12776 }
12777
emitCriticalRegion(CodeGenFunction & CGF,StringRef CriticalName,const RegionCodeGenTy & CriticalOpGen,SourceLocation Loc,const Expr * Hint)12778 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12779 CodeGenFunction &CGF, StringRef CriticalName,
12780 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12781 const Expr *Hint) {
12782 llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784
emitMasterRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc)12785 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12786 const RegionCodeGenTy &MasterOpGen,
12787 SourceLocation Loc) {
12788 llvm_unreachable("Not supported in SIMD-only mode");
12789 }
12790
emitMaskedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & MasterOpGen,SourceLocation Loc,const Expr * Filter)12791 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12792 const RegionCodeGenTy &MasterOpGen,
12793 SourceLocation Loc,
12794 const Expr *Filter) {
12795 llvm_unreachable("Not supported in SIMD-only mode");
12796 }
12797
emitTaskyieldCall(CodeGenFunction & CGF,SourceLocation Loc)12798 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12799 SourceLocation Loc) {
12800 llvm_unreachable("Not supported in SIMD-only mode");
12801 }
12802
emitTaskgroupRegion(CodeGenFunction & CGF,const RegionCodeGenTy & TaskgroupOpGen,SourceLocation Loc)12803 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12804 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12805 SourceLocation Loc) {
12806 llvm_unreachable("Not supported in SIMD-only mode");
12807 }
12808
emitSingleRegion(CodeGenFunction & CGF,const RegionCodeGenTy & SingleOpGen,SourceLocation Loc,ArrayRef<const Expr * > CopyprivateVars,ArrayRef<const Expr * > DestExprs,ArrayRef<const Expr * > SrcExprs,ArrayRef<const Expr * > AssignmentOps)12809 void CGOpenMPSIMDRuntime::emitSingleRegion(
12810 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12811 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12812 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12813 ArrayRef<const Expr *> AssignmentOps) {
12814 llvm_unreachable("Not supported in SIMD-only mode");
12815 }
12816
emitOrderedRegion(CodeGenFunction & CGF,const RegionCodeGenTy & OrderedOpGen,SourceLocation Loc,bool IsThreads)12817 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12818 const RegionCodeGenTy &OrderedOpGen,
12819 SourceLocation Loc,
12820 bool IsThreads) {
12821 llvm_unreachable("Not supported in SIMD-only mode");
12822 }
12823
emitBarrierCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind Kind,bool EmitChecks,bool ForceSimpleCall)12824 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12825 SourceLocation Loc,
12826 OpenMPDirectiveKind Kind,
12827 bool EmitChecks,
12828 bool ForceSimpleCall) {
12829 llvm_unreachable("Not supported in SIMD-only mode");
12830 }
12831
emitForDispatchInit(CodeGenFunction & CGF,SourceLocation Loc,const OpenMPScheduleTy & ScheduleKind,unsigned IVSize,bool IVSigned,bool Ordered,const DispatchRTInput & DispatchValues)12832 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12833 CodeGenFunction &CGF, SourceLocation Loc,
12834 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12835 bool Ordered, const DispatchRTInput &DispatchValues) {
12836 llvm_unreachable("Not supported in SIMD-only mode");
12837 }
12838
emitForStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind,const OpenMPScheduleTy & ScheduleKind,const StaticRTInput & Values)12839 void CGOpenMPSIMDRuntime::emitForStaticInit(
12840 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12841 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12842 llvm_unreachable("Not supported in SIMD-only mode");
12843 }
12844
emitDistributeStaticInit(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDistScheduleClauseKind SchedKind,const StaticRTInput & Values)12845 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12846 CodeGenFunction &CGF, SourceLocation Loc,
12847 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12848 llvm_unreachable("Not supported in SIMD-only mode");
12849 }
12850
emitForOrderedIterationEnd(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned)12851 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12852 SourceLocation Loc,
12853 unsigned IVSize,
12854 bool IVSigned) {
12855 llvm_unreachable("Not supported in SIMD-only mode");
12856 }
12857
emitForStaticFinish(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind DKind)12858 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12859 SourceLocation Loc,
12860 OpenMPDirectiveKind DKind) {
12861 llvm_unreachable("Not supported in SIMD-only mode");
12862 }
12863
emitForNext(CodeGenFunction & CGF,SourceLocation Loc,unsigned IVSize,bool IVSigned,Address IL,Address LB,Address UB,Address ST)12864 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12865 SourceLocation Loc,
12866 unsigned IVSize, bool IVSigned,
12867 Address IL, Address LB,
12868 Address UB, Address ST) {
12869 llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871
emitNumThreadsClause(CodeGenFunction & CGF,llvm::Value * NumThreads,SourceLocation Loc)12872 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12873 llvm::Value *NumThreads,
12874 SourceLocation Loc) {
12875 llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877
emitProcBindClause(CodeGenFunction & CGF,ProcBindKind ProcBind,SourceLocation Loc)12878 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12879 ProcBindKind ProcBind,
12880 SourceLocation Loc) {
12881 llvm_unreachable("Not supported in SIMD-only mode");
12882 }
12883
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)12884 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12885 const VarDecl *VD,
12886 Address VDAddr,
12887 SourceLocation Loc) {
12888 llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890
emitThreadPrivateVarDefinition(const VarDecl * VD,Address VDAddr,SourceLocation Loc,bool PerformInit,CodeGenFunction * CGF)12891 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12892 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12893 CodeGenFunction *CGF) {
12894 llvm_unreachable("Not supported in SIMD-only mode");
12895 }
12896
getAddrOfArtificialThreadPrivate(CodeGenFunction & CGF,QualType VarType,StringRef Name)12897 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12898 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12899 llvm_unreachable("Not supported in SIMD-only mode");
12900 }
12901
emitFlush(CodeGenFunction & CGF,ArrayRef<const Expr * > Vars,SourceLocation Loc,llvm::AtomicOrdering AO)12902 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12903 ArrayRef<const Expr *> Vars,
12904 SourceLocation Loc,
12905 llvm::AtomicOrdering AO) {
12906 llvm_unreachable("Not supported in SIMD-only mode");
12907 }
12908
emitTaskCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPExecutableDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12909 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12910 const OMPExecutableDirective &D,
12911 llvm::Function *TaskFunction,
12912 QualType SharedsTy, Address Shareds,
12913 const Expr *IfCond,
12914 const OMPTaskDataTy &Data) {
12915 llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917
emitTaskLoopCall(CodeGenFunction & CGF,SourceLocation Loc,const OMPLoopDirective & D,llvm::Function * TaskFunction,QualType SharedsTy,Address Shareds,const Expr * IfCond,const OMPTaskDataTy & Data)12918 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12919 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12920 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12921 const Expr *IfCond, const OMPTaskDataTy &Data) {
12922 llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924
emitReduction(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > Privates,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,ArrayRef<const Expr * > ReductionOps,ReductionOptionsTy Options)12925 void CGOpenMPSIMDRuntime::emitReduction(
12926 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12927 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12928 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12929 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12930 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12931 ReductionOps, Options);
12932 }
12933
emitTaskReductionInit(CodeGenFunction & CGF,SourceLocation Loc,ArrayRef<const Expr * > LHSExprs,ArrayRef<const Expr * > RHSExprs,const OMPTaskDataTy & Data)12934 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12935 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12936 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12937 llvm_unreachable("Not supported in SIMD-only mode");
12938 }
12939
emitTaskReductionFini(CodeGenFunction & CGF,SourceLocation Loc,bool IsWorksharingReduction)12940 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12941 SourceLocation Loc,
12942 bool IsWorksharingReduction) {
12943 llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945
emitTaskReductionFixups(CodeGenFunction & CGF,SourceLocation Loc,ReductionCodeGen & RCG,unsigned N)12946 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12947 SourceLocation Loc,
12948 ReductionCodeGen &RCG,
12949 unsigned N) {
12950 llvm_unreachable("Not supported in SIMD-only mode");
12951 }
12952
getTaskReductionItem(CodeGenFunction & CGF,SourceLocation Loc,llvm::Value * ReductionsPtr,LValue SharedLVal)12953 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12954 SourceLocation Loc,
12955 llvm::Value *ReductionsPtr,
12956 LValue SharedLVal) {
12957 llvm_unreachable("Not supported in SIMD-only mode");
12958 }
12959
emitTaskwaitCall(CodeGenFunction & CGF,SourceLocation Loc)12960 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12961 SourceLocation Loc) {
12962 llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964
emitCancellationPointCall(CodeGenFunction & CGF,SourceLocation Loc,OpenMPDirectiveKind CancelRegion)12965 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12966 CodeGenFunction &CGF, SourceLocation Loc,
12967 OpenMPDirectiveKind CancelRegion) {
12968 llvm_unreachable("Not supported in SIMD-only mode");
12969 }
12970
emitCancelCall(CodeGenFunction & CGF,SourceLocation Loc,const Expr * IfCond,OpenMPDirectiveKind CancelRegion)12971 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12972 SourceLocation Loc, const Expr *IfCond,
12973 OpenMPDirectiveKind CancelRegion) {
12974 llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976
emitTargetOutlinedFunction(const OMPExecutableDirective & D,StringRef ParentName,llvm::Function * & OutlinedFn,llvm::Constant * & OutlinedFnID,bool IsOffloadEntry,const RegionCodeGenTy & CodeGen)12977 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12978 const OMPExecutableDirective &D, StringRef ParentName,
12979 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12980 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12981 llvm_unreachable("Not supported in SIMD-only mode");
12982 }
12983
emitTargetCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,llvm::Function * OutlinedFn,llvm::Value * OutlinedFnID,const Expr * IfCond,llvm::PointerIntPair<const Expr *,2,OpenMPDeviceClauseModifier> Device,llvm::function_ref<llvm::Value * (CodeGenFunction & CGF,const OMPLoopDirective & D)> SizeEmitter)12984 void CGOpenMPSIMDRuntime::emitTargetCall(
12985 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12986 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12987 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12988 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12989 const OMPLoopDirective &D)>
12990 SizeEmitter) {
12991 llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993
emitTargetFunctions(GlobalDecl GD)12994 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12995 llvm_unreachable("Not supported in SIMD-only mode");
12996 }
12997
emitTargetGlobalVariable(GlobalDecl GD)12998 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12999 llvm_unreachable("Not supported in SIMD-only mode");
13000 }
13001
emitTargetGlobal(GlobalDecl GD)13002 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13003 return false;
13004 }
13005
emitTeamsCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,SourceLocation Loc,llvm::Function * OutlinedFn,ArrayRef<llvm::Value * > CapturedVars)13006 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13007 const OMPExecutableDirective &D,
13008 SourceLocation Loc,
13009 llvm::Function *OutlinedFn,
13010 ArrayRef<llvm::Value *> CapturedVars) {
13011 llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013
emitNumTeamsClause(CodeGenFunction & CGF,const Expr * NumTeams,const Expr * ThreadLimit,SourceLocation Loc)13014 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13015 const Expr *NumTeams,
13016 const Expr *ThreadLimit,
13017 SourceLocation Loc) {
13018 llvm_unreachable("Not supported in SIMD-only mode");
13019 }
13020
emitTargetDataCalls(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device,const RegionCodeGenTy & CodeGen,TargetDataInfo & Info)13021 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13022 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13023 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13024 llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026
emitTargetDataStandAloneCall(CodeGenFunction & CGF,const OMPExecutableDirective & D,const Expr * IfCond,const Expr * Device)13027 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13028 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13029 const Expr *Device) {
13030 llvm_unreachable("Not supported in SIMD-only mode");
13031 }
13032
emitDoacrossInit(CodeGenFunction & CGF,const OMPLoopDirective & D,ArrayRef<Expr * > NumIterations)13033 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13034 const OMPLoopDirective &D,
13035 ArrayRef<Expr *> NumIterations) {
13036 llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038
emitDoacrossOrdered(CodeGenFunction & CGF,const OMPDependClause * C)13039 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13040 const OMPDependClause *C) {
13041 llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043
13044 const VarDecl *
translateParameter(const FieldDecl * FD,const VarDecl * NativeParam) const13045 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13046 const VarDecl *NativeParam) const {
13047 llvm_unreachable("Not supported in SIMD-only mode");
13048 }
13049
13050 Address
getParameterAddress(CodeGenFunction & CGF,const VarDecl * NativeParam,const VarDecl * TargetParam) const13051 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13052 const VarDecl *NativeParam,
13053 const VarDecl *TargetParam) const {
13054 llvm_unreachable("Not supported in SIMD-only mode");
13055 }
13056