1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)42 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43 for (const auto *C : S.clauses()) {
44 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45 if (const auto *PreInit =
46 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47 for (const auto *I : PreInit->decls()) {
48 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49 CGF.EmitVarDecl(cast<VarDecl>(*I));
50 } else {
51 CodeGenFunction::AutoVarEmission Emission =
52 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53 CGF.EmitAutoVarCleanups(Emission);
54 }
55 }
56 }
57 }
58 }
59 }
60 CodeGenFunction::OMPPrivateScope InlinedShareds;
61
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)62 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63 return CGF.LambdaCaptureFields.lookup(VD) ||
64 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67 }
68
69 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const llvm::Optional<OpenMPDirectiveKind> CapturedRegion=llvm::None,const bool EmitPreInitStmt=true)70 OMPLexicalScope(
71 CodeGenFunction &CGF, const OMPExecutableDirective &S,
72 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73 const bool EmitPreInitStmt = true)
74 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75 InlinedShareds(CGF) {
76 if (EmitPreInitStmt)
77 emitPreInitStmt(CGF, S);
78 if (!CapturedRegion.hasValue())
79 return;
80 assert(S.hasAssociatedStmt() &&
81 "Expected associated statement for inlined directive.");
82 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83 for (const auto &C : CS->captures()) {
84 if (C.capturesVariable() || C.capturesVariableByCopy()) {
85 auto *VD = C.getCapturedVar();
86 assert(VD == VD->getCanonicalDecl() &&
87 "Canonical decl must be captured.");
88 DeclRefExpr DRE(
89 CGF.getContext(), const_cast<VarDecl *>(VD),
90 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91 InlinedShareds.isGlobalVarCaptured(VD)),
92 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94 return CGF.EmitLValue(&DRE).getAddress(CGF);
95 });
96 }
97 }
98 (void)InlinedShareds.Privatize();
99 }
100 };
101
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)105 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106 OpenMPDirectiveKind Kind = S.getDirectiveKind();
107 return !(isOpenMPTargetExecutionDirective(Kind) ||
108 isOpenMPLoopBoundSharingDirective(Kind)) &&
109 isOpenMPParallelDirective(Kind);
110 }
111
112 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)113 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115 EmitPreInitStmt(S)) {}
116 };
117
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)121 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122 OpenMPDirectiveKind Kind = S.getDirectiveKind();
123 return !isOpenMPTargetExecutionDirective(Kind) &&
124 isOpenMPTeamsDirective(Kind);
125 }
126
127 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)128 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130 EmitPreInitStmt(S)) {}
131 };
132
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137 const DeclStmt *PreInits;
138 CodeGenFunction::OMPMapVars PreCondVars;
139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
141 for (const auto *E : LD->counters()) {
142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
143 EmittedAsPrivate.insert(VD->getCanonicalDecl());
144 (void)PreCondVars.setVarAddr(
145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
146 }
147 // Mark private vars as undefs.
148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
149 for (const Expr *IRef : C->varlists()) {
150 const auto *OrigVD =
151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
153 (void)PreCondVars.setVarAddr(
154 CGF, OrigVD,
155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
156 CGF.getContext().getPointerType(
157 OrigVD->getType().getNonReferenceType()))),
158 CGF.getContext().getDeclAlign(OrigVD)));
159 }
160 }
161 }
162 (void)PreCondVars.apply(CGF);
163 // Emit init, __range and __end variables for C++ range loops.
164 (void)OMPLoopBasedDirective::doForAllLoops(
165 LD->getInnermostCapturedStmt()->getCapturedStmt(),
166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
167 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
169 if (const Stmt *Init = CXXFor->getInit())
170 CGF.EmitStmt(Init);
171 CGF.EmitStmt(CXXFor->getRangeStmt());
172 CGF.EmitStmt(CXXFor->getEndStmt());
173 }
174 return false;
175 });
176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179 } else {
180 llvm_unreachable("Unknown loop-based directive kind.");
181 }
182 if (PreInits) {
183 for (const auto *I : PreInits->decls())
184 CGF.EmitVarDecl(cast<VarDecl>(*I));
185 }
186 PreCondVars.restore(CGF);
187 }
188
189 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)190 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
191 : CodeGenFunction::RunCleanupsScope(CGF) {
192 emitPreInitStmt(CGF, S);
193 }
194 };
195
196 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
197 CodeGenFunction::OMPPrivateScope InlinedShareds;
198
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)199 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
200 return CGF.LambdaCaptureFields.lookup(VD) ||
201 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
202 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
203 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
204 }
205
206 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)207 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
208 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
209 InlinedShareds(CGF) {
210 for (const auto *C : S.clauses()) {
211 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
212 if (const auto *PreInit =
213 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
214 for (const auto *I : PreInit->decls()) {
215 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
216 CGF.EmitVarDecl(cast<VarDecl>(*I));
217 } else {
218 CodeGenFunction::AutoVarEmission Emission =
219 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
220 CGF.EmitAutoVarCleanups(Emission);
221 }
222 }
223 }
224 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
225 for (const Expr *E : UDP->varlists()) {
226 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
227 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
228 CGF.EmitVarDecl(*OED);
229 }
230 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
231 for (const Expr *E : UDP->varlists()) {
232 const Decl *D = getBaseDecl(E);
233 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
234 CGF.EmitVarDecl(*OED);
235 }
236 }
237 }
238 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
239 CGF.EmitOMPPrivateClause(S, InlinedShareds);
240 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
241 if (const Expr *E = TG->getReductionRef())
242 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
243 }
244 // Temp copy arrays for inscan reductions should not be emitted as they are
245 // not used in simd only mode.
246 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
247 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
248 if (C->getModifier() != OMPC_REDUCTION_inscan)
249 continue;
250 for (const Expr *E : C->copy_array_temps())
251 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
252 }
253 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
254 while (CS) {
255 for (auto &C : CS->captures()) {
256 if (C.capturesVariable() || C.capturesVariableByCopy()) {
257 auto *VD = C.getCapturedVar();
258 if (CopyArrayTemps.contains(VD))
259 continue;
260 assert(VD == VD->getCanonicalDecl() &&
261 "Canonical decl must be captured.");
262 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
263 isCapturedVar(CGF, VD) ||
264 (CGF.CapturedStmtInfo &&
265 InlinedShareds.isGlobalVarCaptured(VD)),
266 VD->getType().getNonReferenceType(), VK_LValue,
267 C.getLocation());
268 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
269 return CGF.EmitLValue(&DRE).getAddress(CGF);
270 });
271 }
272 }
273 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
274 }
275 (void)InlinedShareds.Privatize();
276 }
277 };
278
279 } // namespace
280
281 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
282 const OMPExecutableDirective &S,
283 const RegionCodeGenTy &CodeGen);
284
EmitOMPSharedLValue(const Expr * E)285 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
286 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
287 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
288 OrigVD = OrigVD->getCanonicalDecl();
289 bool IsCaptured =
290 LambdaCaptureFields.lookup(OrigVD) ||
291 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
292 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
293 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
294 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
295 return EmitLValue(&DRE);
296 }
297 }
298 return EmitLValue(E);
299 }
300
getTypeSize(QualType Ty)301 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
302 ASTContext &C = getContext();
303 llvm::Value *Size = nullptr;
304 auto SizeInChars = C.getTypeSizeInChars(Ty);
305 if (SizeInChars.isZero()) {
306 // getTypeSizeInChars() returns 0 for a VLA.
307 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
308 VlaSizePair VlaSize = getVLASize(VAT);
309 Ty = VlaSize.Type;
310 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
311 : VlaSize.NumElts;
312 }
313 SizeInChars = C.getTypeSizeInChars(Ty);
314 if (SizeInChars.isZero())
315 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
316 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
317 }
318 return CGM.getSize(SizeInChars);
319 }
320
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)321 void CodeGenFunction::GenerateOpenMPCapturedVars(
322 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
323 const RecordDecl *RD = S.getCapturedRecordDecl();
324 auto CurField = RD->field_begin();
325 auto CurCap = S.captures().begin();
326 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
327 E = S.capture_init_end();
328 I != E; ++I, ++CurField, ++CurCap) {
329 if (CurField->hasCapturedVLAType()) {
330 const VariableArrayType *VAT = CurField->getCapturedVLAType();
331 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
332 CapturedVars.push_back(Val);
333 } else if (CurCap->capturesThis()) {
334 CapturedVars.push_back(CXXThisValue);
335 } else if (CurCap->capturesVariableByCopy()) {
336 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
337
338 // If the field is not a pointer, we need to save the actual value
339 // and load it as a void pointer.
340 if (!CurField->getType()->isAnyPointerType()) {
341 ASTContext &Ctx = getContext();
342 Address DstAddr = CreateMemTemp(
343 Ctx.getUIntPtrType(),
344 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
345 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
346
347 llvm::Value *SrcAddrVal = EmitScalarConversion(
348 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
349 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
350 LValue SrcLV =
351 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
352
353 // Store the value using the source type pointer.
354 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
355
356 // Load the value using the destination type pointer.
357 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
358 }
359 CapturedVars.push_back(CV);
360 } else {
361 assert(CurCap->capturesVariable() && "Expected capture by reference.");
362 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
363 }
364 }
365 }
366
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)367 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
368 QualType DstType, StringRef Name,
369 LValue AddrLV) {
370 ASTContext &Ctx = CGF.getContext();
371
372 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
373 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
374 Ctx.getPointerType(DstType), Loc);
375 Address TmpAddr =
376 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
377 .getAddress(CGF);
378 return TmpAddr;
379 }
380
getCanonicalParamType(ASTContext & C,QualType T)381 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
382 if (T->isLValueReferenceType())
383 return C.getLValueReferenceType(
384 getCanonicalParamType(C, T.getNonReferenceType()),
385 /*SpelledAsLValue=*/false);
386 if (T->isPointerType())
387 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
388 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
389 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
390 return getCanonicalParamType(C, VLA->getElementType());
391 if (!A->isVariablyModifiedType())
392 return C.getCanonicalType(T);
393 }
394 return C.getCanonicalParamType(T);
395 }
396
397 namespace {
398 /// Contains required data for proper outlined function codegen.
399 struct FunctionOptions {
400 /// Captured statement for which the function is generated.
401 const CapturedStmt *S = nullptr;
402 /// true if cast to/from UIntPtr is required for variables captured by
403 /// value.
404 const bool UIntPtrCastRequired = true;
405 /// true if only casted arguments must be registered as local args or VLA
406 /// sizes.
407 const bool RegisterCastedArgsOnly = false;
408 /// Name of the generated function.
409 const StringRef FunctionName;
410 /// Location of the non-debug version of the outlined function.
411 SourceLocation Loc;
FunctionOptions__anonae662f400511::FunctionOptions412 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
413 bool RegisterCastedArgsOnly, StringRef FunctionName,
414 SourceLocation Loc)
415 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
416 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
417 FunctionName(FunctionName), Loc(Loc) {}
418 };
419 } // namespace
420
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)421 static llvm::Function *emitOutlinedFunctionPrologue(
422 CodeGenFunction &CGF, FunctionArgList &Args,
423 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
424 &LocalAddrs,
425 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
426 &VLASizes,
427 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
428 const CapturedDecl *CD = FO.S->getCapturedDecl();
429 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
430 assert(CD->hasBody() && "missing CapturedDecl body");
431
432 CXXThisValue = nullptr;
433 // Build the argument list.
434 CodeGenModule &CGM = CGF.CGM;
435 ASTContext &Ctx = CGM.getContext();
436 FunctionArgList TargetArgs;
437 Args.append(CD->param_begin(),
438 std::next(CD->param_begin(), CD->getContextParamPosition()));
439 TargetArgs.append(
440 CD->param_begin(),
441 std::next(CD->param_begin(), CD->getContextParamPosition()));
442 auto I = FO.S->captures().begin();
443 FunctionDecl *DebugFunctionDecl = nullptr;
444 if (!FO.UIntPtrCastRequired) {
445 FunctionProtoType::ExtProtoInfo EPI;
446 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
447 DebugFunctionDecl = FunctionDecl::Create(
448 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
449 SourceLocation(), DeclarationName(), FunctionTy,
450 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
451 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
452 }
453 for (const FieldDecl *FD : RD->fields()) {
454 QualType ArgType = FD->getType();
455 IdentifierInfo *II = nullptr;
456 VarDecl *CapVar = nullptr;
457
458 // If this is a capture by copy and the type is not a pointer, the outlined
459 // function argument type should be uintptr and the value properly casted to
460 // uintptr. This is necessary given that the runtime library is only able to
461 // deal with pointers. We can pass in the same way the VLA type sizes to the
462 // outlined function.
463 if (FO.UIntPtrCastRequired &&
464 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
465 I->capturesVariableArrayType()))
466 ArgType = Ctx.getUIntPtrType();
467
468 if (I->capturesVariable() || I->capturesVariableByCopy()) {
469 CapVar = I->getCapturedVar();
470 II = CapVar->getIdentifier();
471 } else if (I->capturesThis()) {
472 II = &Ctx.Idents.get("this");
473 } else {
474 assert(I->capturesVariableArrayType());
475 II = &Ctx.Idents.get("vla");
476 }
477 if (ArgType->isVariablyModifiedType())
478 ArgType = getCanonicalParamType(Ctx, ArgType);
479 VarDecl *Arg;
480 if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
481 Arg = ParmVarDecl::Create(
482 Ctx, DebugFunctionDecl,
483 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
484 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
485 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
486 } else {
487 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
488 II, ArgType, ImplicitParamDecl::Other);
489 }
490 Args.emplace_back(Arg);
491 // Do not cast arguments if we emit function with non-original types.
492 TargetArgs.emplace_back(
493 FO.UIntPtrCastRequired
494 ? Arg
495 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
496 ++I;
497 }
498 Args.append(
499 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
500 CD->param_end());
501 TargetArgs.append(
502 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
503 CD->param_end());
504
505 // Create the function declaration.
506 const CGFunctionInfo &FuncInfo =
507 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
508 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
509
510 auto *F =
511 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
512 FO.FunctionName, &CGM.getModule());
513 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
514 if (CD->isNothrow())
515 F->setDoesNotThrow();
516 F->setDoesNotRecurse();
517
518 // Generate the function.
519 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
520 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
521 FO.UIntPtrCastRequired ? FO.Loc
522 : CD->getBody()->getBeginLoc());
523 unsigned Cnt = CD->getContextParamPosition();
524 I = FO.S->captures().begin();
525 for (const FieldDecl *FD : RD->fields()) {
526 // Do not map arguments if we emit function with non-original types.
527 Address LocalAddr(Address::invalid());
528 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
529 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
530 TargetArgs[Cnt]);
531 } else {
532 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
533 }
534 // If we are capturing a pointer by copy we don't need to do anything, just
535 // use the value that we get from the arguments.
536 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
537 const VarDecl *CurVD = I->getCapturedVar();
538 if (!FO.RegisterCastedArgsOnly)
539 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
540 ++Cnt;
541 ++I;
542 continue;
543 }
544
545 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
546 AlignmentSource::Decl);
547 if (FD->hasCapturedVLAType()) {
548 if (FO.UIntPtrCastRequired) {
549 ArgLVal = CGF.MakeAddrLValue(
550 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
551 Args[Cnt]->getName(), ArgLVal),
552 FD->getType(), AlignmentSource::Decl);
553 }
554 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
555 const VariableArrayType *VAT = FD->getCapturedVLAType();
556 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
557 } else if (I->capturesVariable()) {
558 const VarDecl *Var = I->getCapturedVar();
559 QualType VarTy = Var->getType();
560 Address ArgAddr = ArgLVal.getAddress(CGF);
561 if (ArgLVal.getType()->isLValueReferenceType()) {
562 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
563 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
564 assert(ArgLVal.getType()->isPointerType());
565 ArgAddr = CGF.EmitLoadOfPointer(
566 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
567 }
568 if (!FO.RegisterCastedArgsOnly) {
569 LocalAddrs.insert(
570 {Args[Cnt],
571 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
572 }
573 } else if (I->capturesVariableByCopy()) {
574 assert(!FD->getType()->isAnyPointerType() &&
575 "Not expecting a captured pointer.");
576 const VarDecl *Var = I->getCapturedVar();
577 LocalAddrs.insert({Args[Cnt],
578 {Var, FO.UIntPtrCastRequired
579 ? castValueFromUintptr(
580 CGF, I->getLocation(), FD->getType(),
581 Args[Cnt]->getName(), ArgLVal)
582 : ArgLVal.getAddress(CGF)}});
583 } else {
584 // If 'this' is captured, load it into CXXThisValue.
585 assert(I->capturesThis());
586 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
587 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
588 }
589 ++Cnt;
590 ++I;
591 }
592
593 return F;
594 }
595
596 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)597 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
598 SourceLocation Loc) {
599 assert(
600 CapturedStmtInfo &&
601 "CapturedStmtInfo should be set when generating the captured function");
602 const CapturedDecl *CD = S.getCapturedDecl();
603 // Build the argument list.
604 bool NeedWrapperFunction =
605 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
606 FunctionArgList Args;
607 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
608 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
609 SmallString<256> Buffer;
610 llvm::raw_svector_ostream Out(Buffer);
611 Out << CapturedStmtInfo->getHelperName();
612 if (NeedWrapperFunction)
613 Out << "_debug__";
614 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
615 Out.str(), Loc);
616 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
617 VLASizes, CXXThisValue, FO);
618 CodeGenFunction::OMPPrivateScope LocalScope(*this);
619 for (const auto &LocalAddrPair : LocalAddrs) {
620 if (LocalAddrPair.second.first) {
621 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
622 return LocalAddrPair.second.second;
623 });
624 }
625 }
626 (void)LocalScope.Privatize();
627 for (const auto &VLASizePair : VLASizes)
628 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
629 PGO.assignRegionCounters(GlobalDecl(CD), F);
630 CapturedStmtInfo->EmitBody(*this, CD->getBody());
631 (void)LocalScope.ForceCleanup();
632 FinishFunction(CD->getBodyRBrace());
633 if (!NeedWrapperFunction)
634 return F;
635
636 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
637 /*RegisterCastedArgsOnly=*/true,
638 CapturedStmtInfo->getHelperName(), Loc);
639 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
640 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
641 Args.clear();
642 LocalAddrs.clear();
643 VLASizes.clear();
644 llvm::Function *WrapperF =
645 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
646 WrapperCGF.CXXThisValue, WrapperFO);
647 llvm::SmallVector<llvm::Value *, 4> CallArgs;
648 auto *PI = F->arg_begin();
649 for (const auto *Arg : Args) {
650 llvm::Value *CallArg;
651 auto I = LocalAddrs.find(Arg);
652 if (I != LocalAddrs.end()) {
653 LValue LV = WrapperCGF.MakeAddrLValue(
654 I->second.second,
655 I->second.first ? I->second.first->getType() : Arg->getType(),
656 AlignmentSource::Decl);
657 if (LV.getType()->isAnyComplexType())
658 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
659 LV.getAddress(WrapperCGF),
660 PI->getType()->getPointerTo(
661 LV.getAddress(WrapperCGF).getAddressSpace())));
662 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
663 } else {
664 auto EI = VLASizes.find(Arg);
665 if (EI != VLASizes.end()) {
666 CallArg = EI->second.second;
667 } else {
668 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
669 Arg->getType(),
670 AlignmentSource::Decl);
671 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
672 }
673 }
674 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
675 ++PI;
676 }
677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
678 WrapperCGF.FinishFunction();
679 return WrapperF;
680 }
681
682 //===----------------------------------------------------------------------===//
683 // OpenMP Directive Emission
684 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)685 void CodeGenFunction::EmitOMPAggregateAssign(
686 Address DestAddr, Address SrcAddr, QualType OriginalType,
687 const llvm::function_ref<void(Address, Address)> CopyGen) {
688 // Perform element-by-element initialization.
689 QualType ElementTy;
690
691 // Drill down to the base element type on both arrays.
692 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
693 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
694 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695
696 llvm::Value *SrcBegin = SrcAddr.getPointer();
697 llvm::Value *DestBegin = DestAddr.getPointer();
698 // Cast from pointer to array type to pointer to single element.
699 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
700 // The basic structure here is a while-do loop.
701 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
702 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
703 llvm::Value *IsEmpty =
704 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
705 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
706
707 // Enter the loop body, making that address the current address.
708 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
709 EmitBlock(BodyBB);
710
711 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
712
713 llvm::PHINode *SrcElementPHI =
714 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
715 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716 Address SrcElementCurrent =
717 Address(SrcElementPHI,
718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
719
720 llvm::PHINode *DestElementPHI =
721 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722 DestElementPHI->addIncoming(DestBegin, EntryBB);
723 Address DestElementCurrent =
724 Address(DestElementPHI,
725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726
727 // Emit copy.
728 CopyGen(DestElementCurrent, SrcElementCurrent);
729
730 // Shift the address forward by one element.
731 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
732 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
733 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
734 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
735 // Check whether we've reached the end.
736 llvm::Value *Done =
737 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
738 Builder.CreateCondBr(Done, DoneBB, BodyBB);
739 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
740 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
741
742 // Done.
743 EmitBlock(DoneBB, /*IsFinished=*/true);
744 }
745
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)746 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
747 Address SrcAddr, const VarDecl *DestVD,
748 const VarDecl *SrcVD, const Expr *Copy) {
749 if (OriginalType->isArrayType()) {
750 const auto *BO = dyn_cast<BinaryOperator>(Copy);
751 if (BO && BO->getOpcode() == BO_Assign) {
752 // Perform simple memcpy for simple copying.
753 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
754 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
755 EmitAggregateAssign(Dest, Src, OriginalType);
756 } else {
757 // For arrays with complex element types perform element by element
758 // copying.
759 EmitOMPAggregateAssign(
760 DestAddr, SrcAddr, OriginalType,
761 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
762 // Working with the single array element, so have to remap
763 // destination and source variables to corresponding array
764 // elements.
765 CodeGenFunction::OMPPrivateScope Remap(*this);
766 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
767 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
768 (void)Remap.Privatize();
769 EmitIgnoredExpr(Copy);
770 });
771 }
772 } else {
773 // Remap pseudo source variable to private copy.
774 CodeGenFunction::OMPPrivateScope Remap(*this);
775 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
776 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
777 (void)Remap.Privatize();
778 // Emit copying of the whole variable.
779 EmitIgnoredExpr(Copy);
780 }
781 }
782
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)783 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
784 OMPPrivateScope &PrivateScope) {
785 if (!HaveInsertPoint())
786 return false;
787 bool DeviceConstTarget =
788 getLangOpts().OpenMPIsDevice &&
789 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
790 bool FirstprivateIsLastprivate = false;
791 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
792 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
793 for (const auto *D : C->varlists())
794 Lastprivates.try_emplace(
795 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
796 C->getKind());
797 }
798 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
799 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
800 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
801 // Force emission of the firstprivate copy if the directive does not emit
802 // outlined function, like omp for, omp simd, omp distribute etc.
803 bool MustEmitFirstprivateCopy =
804 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
805 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
806 const auto *IRef = C->varlist_begin();
807 const auto *InitsRef = C->inits().begin();
808 for (const Expr *IInit : C->private_copies()) {
809 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
810 bool ThisFirstprivateIsLastprivate =
811 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
812 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
813 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
814 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
815 !FD->getType()->isReferenceType() &&
816 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
817 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
818 ++IRef;
819 ++InitsRef;
820 continue;
821 }
822 // Do not emit copy for firstprivate constant variables in target regions,
823 // captured by reference.
824 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
825 FD && FD->getType()->isReferenceType() &&
826 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
827 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
828 OrigVD);
829 ++IRef;
830 ++InitsRef;
831 continue;
832 }
833 FirstprivateIsLastprivate =
834 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
835 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
836 const auto *VDInit =
837 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
838 bool IsRegistered;
839 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
840 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
841 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
842 LValue OriginalLVal;
843 if (!FD) {
844 // Check if the firstprivate variable is just a constant value.
845 ConstantEmission CE = tryEmitAsConstant(&DRE);
846 if (CE && !CE.isReference()) {
847 // Constant value, no need to create a copy.
848 ++IRef;
849 ++InitsRef;
850 continue;
851 }
852 if (CE && CE.isReference()) {
853 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
854 } else {
855 assert(!CE && "Expected non-constant firstprivate.");
856 OriginalLVal = EmitLValue(&DRE);
857 }
858 } else {
859 OriginalLVal = EmitLValue(&DRE);
860 }
861 QualType Type = VD->getType();
862 if (Type->isArrayType()) {
863 // Emit VarDecl with copy init for arrays.
864 // Get the address of the original variable captured in current
865 // captured region.
866 IsRegistered = PrivateScope.addPrivate(
867 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
868 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
869 const Expr *Init = VD->getInit();
870 if (!isa<CXXConstructExpr>(Init) ||
871 isTrivialInitializer(Init)) {
872 // Perform simple memcpy.
873 LValue Dest =
874 MakeAddrLValue(Emission.getAllocatedAddress(), Type);
875 EmitAggregateAssign(Dest, OriginalLVal, Type);
876 } else {
877 EmitOMPAggregateAssign(
878 Emission.getAllocatedAddress(),
879 OriginalLVal.getAddress(*this), Type,
880 [this, VDInit, Init](Address DestElement,
881 Address SrcElement) {
882 // Clean up any temporaries needed by the
883 // initialization.
884 RunCleanupsScope InitScope(*this);
885 // Emit initialization for single element.
886 setAddrOfLocalVar(VDInit, SrcElement);
887 EmitAnyExprToMem(Init, DestElement,
888 Init->getType().getQualifiers(),
889 /*IsInitializer*/ false);
890 LocalDeclMap.erase(VDInit);
891 });
892 }
893 EmitAutoVarCleanups(Emission);
894 return Emission.getAllocatedAddress();
895 });
896 } else {
897 Address OriginalAddr = OriginalLVal.getAddress(*this);
898 IsRegistered =
899 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
900 ThisFirstprivateIsLastprivate,
901 OrigVD, &Lastprivates, IRef]() {
902 // Emit private VarDecl with copy init.
903 // Remap temp VDInit variable to the address of the original
904 // variable (for proper handling of captured global variables).
905 setAddrOfLocalVar(VDInit, OriginalAddr);
906 EmitDecl(*VD);
907 LocalDeclMap.erase(VDInit);
908 if (ThisFirstprivateIsLastprivate &&
909 Lastprivates[OrigVD->getCanonicalDecl()] ==
910 OMPC_LASTPRIVATE_conditional) {
911 // Create/init special variable for lastprivate conditionals.
912 Address VDAddr =
913 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
914 *this, OrigVD);
915 llvm::Value *V = EmitLoadOfScalar(
916 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
917 AlignmentSource::Decl),
918 (*IRef)->getExprLoc());
919 EmitStoreOfScalar(V,
920 MakeAddrLValue(VDAddr, (*IRef)->getType(),
921 AlignmentSource::Decl));
922 LocalDeclMap.erase(VD);
923 setAddrOfLocalVar(VD, VDAddr);
924 return VDAddr;
925 }
926 return GetAddrOfLocalVar(VD);
927 });
928 }
929 assert(IsRegistered &&
930 "firstprivate var already registered as private");
931 // Silence the warning about unused variable.
932 (void)IsRegistered;
933 }
934 ++IRef;
935 ++InitsRef;
936 }
937 }
938 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
939 }
940
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)941 void CodeGenFunction::EmitOMPPrivateClause(
942 const OMPExecutableDirective &D,
943 CodeGenFunction::OMPPrivateScope &PrivateScope) {
944 if (!HaveInsertPoint())
945 return;
946 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
947 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
948 auto IRef = C->varlist_begin();
949 for (const Expr *IInit : C->private_copies()) {
950 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
951 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
952 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
953 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
954 // Emit private VarDecl with copy init.
955 EmitDecl(*VD);
956 return GetAddrOfLocalVar(VD);
957 });
958 assert(IsRegistered && "private var already registered as private");
959 // Silence the warning about unused variable.
960 (void)IsRegistered;
961 }
962 ++IRef;
963 }
964 }
965 }
966
EmitOMPCopyinClause(const OMPExecutableDirective & D)967 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
968 if (!HaveInsertPoint())
969 return false;
970 // threadprivate_var1 = master_threadprivate_var1;
971 // operator=(threadprivate_var2, master_threadprivate_var2);
972 // ...
973 // __kmpc_barrier(&loc, global_tid);
974 llvm::DenseSet<const VarDecl *> CopiedVars;
975 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
976 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
977 auto IRef = C->varlist_begin();
978 auto ISrcRef = C->source_exprs().begin();
979 auto IDestRef = C->destination_exprs().begin();
980 for (const Expr *AssignOp : C->assignment_ops()) {
981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
982 QualType Type = VD->getType();
983 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
984 // Get the address of the master variable. If we are emitting code with
985 // TLS support, the address is passed from the master as field in the
986 // captured declaration.
987 Address MasterAddr = Address::invalid();
988 if (getLangOpts().OpenMPUseTLS &&
989 getContext().getTargetInfo().isTLSSupported()) {
990 assert(CapturedStmtInfo->lookup(VD) &&
991 "Copyin threadprivates should have been captured!");
992 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
993 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
994 MasterAddr = EmitLValue(&DRE).getAddress(*this);
995 LocalDeclMap.erase(VD);
996 } else {
997 MasterAddr =
998 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
999 : CGM.GetAddrOfGlobal(VD),
1000 getContext().getDeclAlign(VD));
1001 }
1002 // Get the address of the threadprivate variable.
1003 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1004 if (CopiedVars.size() == 1) {
1005 // At first check if current thread is a master thread. If it is, no
1006 // need to copy data.
1007 CopyBegin = createBasicBlock("copyin.not.master");
1008 CopyEnd = createBasicBlock("copyin.not.master.end");
1009 // TODO: Avoid ptrtoint conversion.
1010 auto *MasterAddrInt =
1011 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1012 auto *PrivateAddrInt =
1013 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1014 Builder.CreateCondBr(
1015 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1016 CopyEnd);
1017 EmitBlock(CopyBegin);
1018 }
1019 const auto *SrcVD =
1020 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1021 const auto *DestVD =
1022 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1023 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1024 }
1025 ++IRef;
1026 ++ISrcRef;
1027 ++IDestRef;
1028 }
1029 }
1030 if (CopyEnd) {
1031 // Exit out of copying procedure for non-master thread.
1032 EmitBlock(CopyEnd, /*IsFinished=*/true);
1033 return true;
1034 }
1035 return false;
1036 }
1037
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1038 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1039 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1040 if (!HaveInsertPoint())
1041 return false;
1042 bool HasAtLeastOneLastprivate = false;
1043 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1044 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1045 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1046 for (const Expr *C : LoopDirective->counters()) {
1047 SIMDLCVs.insert(
1048 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1049 }
1050 }
1051 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1052 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1053 HasAtLeastOneLastprivate = true;
1054 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1055 !getLangOpts().OpenMPSimd)
1056 break;
1057 const auto *IRef = C->varlist_begin();
1058 const auto *IDestRef = C->destination_exprs().begin();
1059 for (const Expr *IInit : C->private_copies()) {
1060 // Keep the address of the original variable for future update at the end
1061 // of the loop.
1062 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1063 // Taskloops do not require additional initialization, it is done in
1064 // runtime support library.
1065 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1066 const auto *DestVD =
1067 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1068 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1069 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1070 /*RefersToEnclosingVariableOrCapture=*/
1071 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1072 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1073 return EmitLValue(&DRE).getAddress(*this);
1074 });
1075 // Check if the variable is also a firstprivate: in this case IInit is
1076 // not generated. Initialization of this variable will happen in codegen
1077 // for 'firstprivate' clause.
1078 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1079 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1080 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1081 OrigVD]() {
1082 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1083 Address VDAddr =
1084 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1085 OrigVD);
1086 setAddrOfLocalVar(VD, VDAddr);
1087 return VDAddr;
1088 }
1089 // Emit private VarDecl with copy init.
1090 EmitDecl(*VD);
1091 return GetAddrOfLocalVar(VD);
1092 });
1093 assert(IsRegistered &&
1094 "lastprivate var already registered as private");
1095 (void)IsRegistered;
1096 }
1097 }
1098 ++IRef;
1099 ++IDestRef;
1100 }
1101 }
1102 return HasAtLeastOneLastprivate;
1103 }
1104
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective &D, bool NoFinals,
1107 llvm::Value *IsLastIterCond) {
1108 if (!HaveInsertPoint())
1109 return;
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1113 // ...
1114 // orig_varn = private_orig_varn;
1115 // }
1116 llvm::BasicBlock *ThenBB = nullptr;
1117 llvm::BasicBlock *DoneBB = nullptr;
1118 if (IsLastIterCond) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd &&
1122 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1123 [](const OMPLastprivateClause *C) {
1124 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1125 })) {
1126 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1127 OMPD_unknown,
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1130 }
1131 ThenBB = createBasicBlock(".omp.lastprivate.then");
1132 DoneBB = createBasicBlock(".omp.lastprivate.done");
1133 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1134 EmitBlock(ThenBB);
1135 }
1136 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1137 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1138 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1139 auto IC = LoopDirective->counters().begin();
1140 for (const Expr *F : LoopDirective->finals()) {
1141 const auto *D =
1142 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1143 if (NoFinals)
1144 AlreadyEmittedVars.insert(D);
1145 else
1146 LoopCountersAndUpdates[D] = F;
1147 ++IC;
1148 }
1149 }
1150 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1151 auto IRef = C->varlist_begin();
1152 auto ISrcRef = C->source_exprs().begin();
1153 auto IDestRef = C->destination_exprs().begin();
1154 for (const Expr *AssignOp : C->assignment_ops()) {
1155 const auto *PrivateVD =
1156 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1157 QualType Type = PrivateVD->getType();
1158 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1159 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1162 // variable.
1163 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1164 EmitIgnoredExpr(FinalExpr);
1165 const auto *SrcVD =
1166 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1167 const auto *DestVD =
1168 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1171 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1172 PrivateAddr =
1173 Address(Builder.CreateLoad(PrivateAddr),
1174 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1175 // Store the last value to the private copy in the last iteration.
1176 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1177 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1178 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1179 (*IRef)->getExprLoc());
1180 // Get the address of the original variable.
1181 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1182 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1183 }
1184 ++IRef;
1185 ++ISrcRef;
1186 ++IDestRef;
1187 }
1188 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1189 EmitIgnoredExpr(PostUpdate);
1190 }
1191 if (IsLastIterCond)
1192 EmitBlock(DoneBB, /*IsFinished=*/true);
1193 }
1194
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1195 void CodeGenFunction::EmitOMPReductionClauseInit(
1196 const OMPExecutableDirective &D,
1197 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1198 if (!HaveInsertPoint())
1199 return;
1200 SmallVector<const Expr *, 4> Shareds;
1201 SmallVector<const Expr *, 4> Privates;
1202 SmallVector<const Expr *, 4> ReductionOps;
1203 SmallVector<const Expr *, 4> LHSs;
1204 SmallVector<const Expr *, 4> RHSs;
1205 OMPTaskDataTy Data;
1206 SmallVector<const Expr *, 4> TaskLHSs;
1207 SmallVector<const Expr *, 4> TaskRHSs;
1208 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1209 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1210 continue;
1211 Shareds.append(C->varlist_begin(), C->varlist_end());
1212 Privates.append(C->privates().begin(), C->privates().end());
1213 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1214 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1215 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1216 if (C->getModifier() == OMPC_REDUCTION_task) {
1217 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1218 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1219 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1220 Data.ReductionOps.append(C->reduction_ops().begin(),
1221 C->reduction_ops().end());
1222 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1223 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1224 }
1225 }
1226 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1227 unsigned Count = 0;
1228 auto *ILHS = LHSs.begin();
1229 auto *IRHS = RHSs.begin();
1230 auto *IPriv = Privates.begin();
1231 for (const Expr *IRef : Shareds) {
1232 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1233 // Emit private VarDecl with reduction init.
1234 RedCG.emitSharedOrigLValue(*this, Count);
1235 RedCG.emitAggregateType(*this, Count);
1236 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1237 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1238 RedCG.getSharedLValue(Count),
1239 [&Emission](CodeGenFunction &CGF) {
1240 CGF.EmitAutoVarInit(Emission);
1241 return true;
1242 });
1243 EmitAutoVarCleanups(Emission);
1244 Address BaseAddr = RedCG.adjustPrivateAddress(
1245 *this, Count, Emission.getAllocatedAddress());
1246 bool IsRegistered = PrivateScope.addPrivate(
1247 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1248 assert(IsRegistered && "private var already registered as private");
1249 // Silence the warning about unused variable.
1250 (void)IsRegistered;
1251
1252 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1253 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1254 QualType Type = PrivateVD->getType();
1255 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1256 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1257 // Store the address of the original variable associated with the LHS
1258 // implicit variable.
1259 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1260 return RedCG.getSharedLValue(Count).getAddress(*this);
1261 });
1262 PrivateScope.addPrivate(
1263 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1264 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1265 isa<ArraySubscriptExpr>(IRef)) {
1266 // Store the address of the original variable associated with the LHS
1267 // implicit variable.
1268 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1269 return RedCG.getSharedLValue(Count).getAddress(*this);
1270 });
1271 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1272 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1273 ConvertTypeForMem(RHSVD->getType()),
1274 "rhs.begin");
1275 });
1276 } else {
1277 QualType Type = PrivateVD->getType();
1278 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1279 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1280 // Store the address of the original variable associated with the LHS
1281 // implicit variable.
1282 if (IsArray) {
1283 OriginalAddr = Builder.CreateElementBitCast(
1284 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1285 }
1286 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1287 PrivateScope.addPrivate(
1288 RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1289 return IsArray
1290 ? Builder.CreateElementBitCast(
1291 GetAddrOfLocalVar(PrivateVD),
1292 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1293 : GetAddrOfLocalVar(PrivateVD);
1294 });
1295 }
1296 ++ILHS;
1297 ++IRHS;
1298 ++IPriv;
1299 ++Count;
1300 }
1301 if (!Data.ReductionVars.empty()) {
1302 Data.IsReductionWithTaskMod = true;
1303 Data.IsWorksharingReduction =
1304 isOpenMPWorksharingDirective(D.getDirectiveKind());
1305 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1306 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1307 const Expr *TaskRedRef = nullptr;
1308 switch (D.getDirectiveKind()) {
1309 case OMPD_parallel:
1310 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1311 break;
1312 case OMPD_for:
1313 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1314 break;
1315 case OMPD_sections:
1316 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1317 break;
1318 case OMPD_parallel_for:
1319 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1320 break;
1321 case OMPD_parallel_master:
1322 TaskRedRef =
1323 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1324 break;
1325 case OMPD_parallel_sections:
1326 TaskRedRef =
1327 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1328 break;
1329 case OMPD_target_parallel:
1330 TaskRedRef =
1331 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1332 break;
1333 case OMPD_target_parallel_for:
1334 TaskRedRef =
1335 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1336 break;
1337 case OMPD_distribute_parallel_for:
1338 TaskRedRef =
1339 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1340 break;
1341 case OMPD_teams_distribute_parallel_for:
1342 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1343 .getTaskReductionRefExpr();
1344 break;
1345 case OMPD_target_teams_distribute_parallel_for:
1346 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1347 .getTaskReductionRefExpr();
1348 break;
1349 case OMPD_simd:
1350 case OMPD_for_simd:
1351 case OMPD_section:
1352 case OMPD_single:
1353 case OMPD_master:
1354 case OMPD_critical:
1355 case OMPD_parallel_for_simd:
1356 case OMPD_task:
1357 case OMPD_taskyield:
1358 case OMPD_barrier:
1359 case OMPD_taskwait:
1360 case OMPD_taskgroup:
1361 case OMPD_flush:
1362 case OMPD_depobj:
1363 case OMPD_scan:
1364 case OMPD_ordered:
1365 case OMPD_atomic:
1366 case OMPD_teams:
1367 case OMPD_target:
1368 case OMPD_cancellation_point:
1369 case OMPD_cancel:
1370 case OMPD_target_data:
1371 case OMPD_target_enter_data:
1372 case OMPD_target_exit_data:
1373 case OMPD_taskloop:
1374 case OMPD_taskloop_simd:
1375 case OMPD_master_taskloop:
1376 case OMPD_master_taskloop_simd:
1377 case OMPD_parallel_master_taskloop:
1378 case OMPD_parallel_master_taskloop_simd:
1379 case OMPD_distribute:
1380 case OMPD_target_update:
1381 case OMPD_distribute_parallel_for_simd:
1382 case OMPD_distribute_simd:
1383 case OMPD_target_parallel_for_simd:
1384 case OMPD_target_simd:
1385 case OMPD_teams_distribute:
1386 case OMPD_teams_distribute_simd:
1387 case OMPD_teams_distribute_parallel_for_simd:
1388 case OMPD_target_teams:
1389 case OMPD_target_teams_distribute:
1390 case OMPD_target_teams_distribute_parallel_for_simd:
1391 case OMPD_target_teams_distribute_simd:
1392 case OMPD_declare_target:
1393 case OMPD_end_declare_target:
1394 case OMPD_threadprivate:
1395 case OMPD_allocate:
1396 case OMPD_declare_reduction:
1397 case OMPD_declare_mapper:
1398 case OMPD_declare_simd:
1399 case OMPD_requires:
1400 case OMPD_declare_variant:
1401 case OMPD_begin_declare_variant:
1402 case OMPD_end_declare_variant:
1403 case OMPD_unknown:
1404 default:
1405 llvm_unreachable("Enexpected directive with task reductions.");
1406 }
1407
1408 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1409 EmitVarDecl(*VD);
1410 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1411 /*Volatile=*/false, TaskRedRef->getType());
1412 }
1413 }
1414
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1415 void CodeGenFunction::EmitOMPReductionClauseFinal(
1416 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1417 if (!HaveInsertPoint())
1418 return;
1419 llvm::SmallVector<const Expr *, 8> Privates;
1420 llvm::SmallVector<const Expr *, 8> LHSExprs;
1421 llvm::SmallVector<const Expr *, 8> RHSExprs;
1422 llvm::SmallVector<const Expr *, 8> ReductionOps;
1423 bool HasAtLeastOneReduction = false;
1424 bool IsReductionWithTaskMod = false;
1425 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1426 // Do not emit for inscan reductions.
1427 if (C->getModifier() == OMPC_REDUCTION_inscan)
1428 continue;
1429 HasAtLeastOneReduction = true;
1430 Privates.append(C->privates().begin(), C->privates().end());
1431 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1432 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1433 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1434 IsReductionWithTaskMod =
1435 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1436 }
1437 if (HasAtLeastOneReduction) {
1438 if (IsReductionWithTaskMod) {
1439 CGM.getOpenMPRuntime().emitTaskReductionFini(
1440 *this, D.getBeginLoc(),
1441 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1442 }
1443 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1444 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1445 ReductionKind == OMPD_simd;
1446 bool SimpleReduction = ReductionKind == OMPD_simd;
1447 // Emit nowait reduction if nowait clause is present or directive is a
1448 // parallel directive (it always has implicit barrier).
1449 CGM.getOpenMPRuntime().emitReduction(
1450 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1451 {WithNowait, SimpleReduction, ReductionKind});
1452 }
1453 }
1454
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1455 static void emitPostUpdateForReductionClause(
1456 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1457 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1458 if (!CGF.HaveInsertPoint())
1459 return;
1460 llvm::BasicBlock *DoneBB = nullptr;
1461 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1462 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1463 if (!DoneBB) {
1464 if (llvm::Value *Cond = CondGen(CGF)) {
1465 // If the first post-update expression is found, emit conditional
1466 // block if it was requested.
1467 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1468 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1469 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1470 CGF.EmitBlock(ThenBB);
1471 }
1472 }
1473 CGF.EmitIgnoredExpr(PostUpdate);
1474 }
1475 }
1476 if (DoneBB)
1477 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1478 }
1479
1480 namespace {
1481 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1482 /// parallel function. This is necessary for combined constructs such as
1483 /// 'distribute parallel for'
1484 typedef llvm::function_ref<void(CodeGenFunction &,
1485 const OMPExecutableDirective &,
1486 llvm::SmallVectorImpl<llvm::Value *> &)>
1487 CodeGenBoundParametersTy;
1488 } // anonymous namespace
1489
1490 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1491 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1492 const OMPExecutableDirective &S) {
1493 if (CGF.getLangOpts().OpenMP < 50)
1494 return;
1495 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1496 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1497 for (const Expr *Ref : C->varlists()) {
1498 if (!Ref->getType()->isScalarType())
1499 continue;
1500 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1501 if (!DRE)
1502 continue;
1503 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1504 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1505 }
1506 }
1507 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1508 for (const Expr *Ref : C->varlists()) {
1509 if (!Ref->getType()->isScalarType())
1510 continue;
1511 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1512 if (!DRE)
1513 continue;
1514 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1515 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1516 }
1517 }
1518 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1519 for (const Expr *Ref : C->varlists()) {
1520 if (!Ref->getType()->isScalarType())
1521 continue;
1522 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1523 if (!DRE)
1524 continue;
1525 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1526 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1527 }
1528 }
1529 // Privates should ne analyzed since they are not captured at all.
1530 // Task reductions may be skipped - tasks are ignored.
1531 // Firstprivates do not return value but may be passed by reference - no need
1532 // to check for updated lastprivate conditional.
1533 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1534 for (const Expr *Ref : C->varlists()) {
1535 if (!Ref->getType()->isScalarType())
1536 continue;
1537 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1538 if (!DRE)
1539 continue;
1540 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1541 }
1542 }
1543 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1544 CGF, S, PrivateDecls);
1545 }
1546
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1547 static void emitCommonOMPParallelDirective(
1548 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1549 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1550 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1551 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1552 llvm::Function *OutlinedFn =
1553 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1554 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1555 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1556 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1557 llvm::Value *NumThreads =
1558 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1559 /*IgnoreResultAssign=*/true);
1560 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1561 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1562 }
1563 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1564 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1565 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1566 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1567 }
1568 const Expr *IfCond = nullptr;
1569 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1570 if (C->getNameModifier() == OMPD_unknown ||
1571 C->getNameModifier() == OMPD_parallel) {
1572 IfCond = C->getCondition();
1573 break;
1574 }
1575 }
1576
1577 OMPParallelScope Scope(CGF, S);
1578 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1579 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1580 // lower and upper bounds with the pragma 'for' chunking mechanism.
1581 // The following lambda takes care of appending the lower and upper bound
1582 // parameters when necessary
1583 CodeGenBoundParameters(CGF, S, CapturedVars);
1584 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1586 CapturedVars, IfCond);
1587 }
1588
isAllocatableDecl(const VarDecl * VD)1589 static bool isAllocatableDecl(const VarDecl *VD) {
1590 const VarDecl *CVD = VD->getCanonicalDecl();
1591 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1592 return false;
1593 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1594 // Use the default allocation.
1595 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1596 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1597 !AA->getAllocator());
1598 }
1599
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1600 static void emitEmptyBoundParameters(CodeGenFunction &,
1601 const OMPExecutableDirective &,
1602 llvm::SmallVectorImpl<llvm::Value *> &) {}
1603
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1604 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1605 CodeGenFunction &CGF, const VarDecl *VD) {
1606 CodeGenModule &CGM = CGF.CGM;
1607 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1608
1609 if (!VD)
1610 return Address::invalid();
1611 const VarDecl *CVD = VD->getCanonicalDecl();
1612 if (!isAllocatableDecl(CVD))
1613 return Address::invalid();
1614 llvm::Value *Size;
1615 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1616 if (CVD->getType()->isVariablyModifiedType()) {
1617 Size = CGF.getTypeSize(CVD->getType());
1618 // Align the size: ((size + align - 1) / align) * align
1619 Size = CGF.Builder.CreateNUWAdd(
1620 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1621 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1622 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1623 } else {
1624 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1625 Size = CGM.getSize(Sz.alignTo(Align));
1626 }
1627
1628 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1629 assert(AA->getAllocator() &&
1630 "Expected allocator expression for non-default allocator.");
1631 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1632 // According to the standard, the original allocator type is a enum (integer).
1633 // Convert to pointer type, if required.
1634 if (Allocator->getType()->isIntegerTy())
1635 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1636 else if (Allocator->getType()->isPointerTy())
1637 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1638 CGM.VoidPtrTy);
1639
1640 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1641 CGF.Builder, Size, Allocator,
1642 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1643 llvm::CallInst *FreeCI =
1644 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1645
1646 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1647 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1648 Addr,
1649 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1650 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1651 return Address(Addr, Align);
1652 }
1653
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1654 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1655 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1656 SourceLocation Loc) {
1657 CodeGenModule &CGM = CGF.CGM;
1658 if (CGM.getLangOpts().OpenMPUseTLS &&
1659 CGM.getContext().getTargetInfo().isTLSSupported())
1660 return VDAddr;
1661
1662 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1663
1664 llvm::Type *VarTy = VDAddr.getElementType();
1665 llvm::Value *Data =
1666 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1667 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1668 std::string Suffix = getNameWithSeparators({"cache", ""});
1669 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1670
1671 llvm::CallInst *ThreadPrivateCacheCall =
1672 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1673
1674 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1675 }
1676
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1677 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1678 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1679 SmallString<128> Buffer;
1680 llvm::raw_svector_ostream OS(Buffer);
1681 StringRef Sep = FirstSeparator;
1682 for (StringRef Part : Parts) {
1683 OS << Sep << Part;
1684 Sep = Separator;
1685 }
1686 return OS.str().str();
1687 }
EmitOMPParallelDirective(const OMPParallelDirective & S)1688 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1689 if (CGM.getLangOpts().OpenMPIRBuilder) {
1690 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1691 // Check if we have any if clause associated with the directive.
1692 llvm::Value *IfCond = nullptr;
1693 if (const auto *C = S.getSingleClause<OMPIfClause>())
1694 IfCond = EmitScalarExpr(C->getCondition(),
1695 /*IgnoreResultAssign=*/true);
1696
1697 llvm::Value *NumThreads = nullptr;
1698 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1699 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1700 /*IgnoreResultAssign=*/true);
1701
1702 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1703 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1704 ProcBind = ProcBindClause->getProcBindKind();
1705
1706 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1707
1708 // The cleanup callback that finalizes all variabels at the given location,
1709 // thus calls destructors etc.
1710 auto FiniCB = [this](InsertPointTy IP) {
1711 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1712 };
1713
1714 // Privatization callback that performs appropriate action for
1715 // shared/private/firstprivate/lastprivate/copyin/... variables.
1716 //
1717 // TODO: This defaults to shared right now.
1718 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1719 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1720 // The next line is appropriate only for variables (Val) with the
1721 // data-sharing attribute "shared".
1722 ReplVal = &Val;
1723
1724 return CodeGenIP;
1725 };
1726
1727 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1728 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1729
1730 auto BodyGenCB = [ParallelRegionBodyStmt,
1731 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1732 llvm::BasicBlock &ContinuationBB) {
1733 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1734 ContinuationBB);
1735 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1736 CodeGenIP, ContinuationBB);
1737 };
1738
1739 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1740 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1741 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1742 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1743 Builder.restoreIP(
1744 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1745 IfCond, NumThreads, ProcBind, S.hasCancel()));
1746 return;
1747 }
1748
1749 // Emit parallel region as a standalone region.
1750 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1751 Action.Enter(CGF);
1752 OMPPrivateScope PrivateScope(CGF);
1753 bool Copyins = CGF.EmitOMPCopyinClause(S);
1754 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1755 if (Copyins) {
1756 // Emit implicit barrier to synchronize threads and avoid data races on
1757 // propagation master's thread values of threadprivate variables to local
1758 // instances of that variables of all other implicit threads.
1759 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1760 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1761 /*ForceSimpleCall=*/true);
1762 }
1763 CGF.EmitOMPPrivateClause(S, PrivateScope);
1764 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1765 (void)PrivateScope.Privatize();
1766 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1767 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1768 };
1769 {
1770 auto LPCRegion =
1771 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1772 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1773 emitEmptyBoundParameters);
1774 emitPostUpdateForReductionClause(*this, S,
1775 [](CodeGenFunction &) { return nullptr; });
1776 }
1777 // Check for outer lastprivate conditional update.
1778 checkForLastprivateConditionalUpdate(*this, S);
1779 }
1780
1781 namespace {
1782 /// RAII to handle scopes for loop transformation directives.
1783 class OMPTransformDirectiveScopeRAII {
1784 OMPLoopScope *Scope = nullptr;
1785 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1786 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1787
1788 public:
OMPTransformDirectiveScopeRAII(CodeGenFunction & CGF,const Stmt * S)1789 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1790 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1791 Scope = new OMPLoopScope(CGF, *Dir);
1792 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1793 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1794 }
1795 }
~OMPTransformDirectiveScopeRAII()1796 ~OMPTransformDirectiveScopeRAII() {
1797 if (!Scope)
1798 return;
1799 delete CapInfoRAII;
1800 delete CGSI;
1801 delete Scope;
1802 }
1803 };
1804 } // namespace
1805
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1806 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1807 int MaxLevel, int Level = 0) {
1808 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1809 const Stmt *SimplifiedS = S->IgnoreContainers();
1810 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1811 PrettyStackTraceLoc CrashInfo(
1812 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1813 "LLVM IR generation of compound statement ('{}')");
1814
1815 // Keep track of the current cleanup stack depth, including debug scopes.
1816 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1817 for (const Stmt *CurStmt : CS->body())
1818 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1819 return;
1820 }
1821 if (SimplifiedS == NextLoop) {
1822 OMPTransformDirectiveScopeRAII PossiblyTransformDirectiveScope(CGF,
1823 SimplifiedS);
1824 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1825 SimplifiedS = Dir->getTransformedStmt();
1826 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1827 SimplifiedS = CanonLoop->getLoopStmt();
1828 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1829 S = For->getBody();
1830 } else {
1831 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1832 "Expected canonical for loop or range-based for loop.");
1833 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1834 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1835 S = CXXFor->getBody();
1836 }
1837 if (Level + 1 < MaxLevel) {
1838 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1839 S, /*TryImperfectlyNestedLoops=*/true);
1840 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1841 return;
1842 }
1843 }
1844 CGF.EmitStmt(S);
1845 }
1846
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1847 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1848 JumpDest LoopExit) {
1849 RunCleanupsScope BodyScope(*this);
1850 // Update counters values on current iteration.
1851 for (const Expr *UE : D.updates())
1852 EmitIgnoredExpr(UE);
1853 // Update the linear variables.
1854 // In distribute directives only loop counters may be marked as linear, no
1855 // need to generate the code for them.
1856 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1857 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1858 for (const Expr *UE : C->updates())
1859 EmitIgnoredExpr(UE);
1860 }
1861 }
1862
1863 // On a continue in the body, jump to the end.
1864 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1865 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1866 for (const Expr *E : D.finals_conditions()) {
1867 if (!E)
1868 continue;
1869 // Check that loop counter in non-rectangular nest fits into the iteration
1870 // space.
1871 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1872 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1873 getProfileCount(D.getBody()));
1874 EmitBlock(NextBB);
1875 }
1876
1877 OMPPrivateScope InscanScope(*this);
1878 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1879 bool IsInscanRegion = InscanScope.Privatize();
1880 if (IsInscanRegion) {
1881 // Need to remember the block before and after scan directive
1882 // to dispatch them correctly depending on the clause used in
1883 // this directive, inclusive or exclusive. For inclusive scan the natural
1884 // order of the blocks is used, for exclusive clause the blocks must be
1885 // executed in reverse order.
1886 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1887 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1888 // No need to allocate inscan exit block, in simd mode it is selected in the
1889 // codegen for the scan directive.
1890 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1891 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1892 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1893 EmitBranch(OMPScanDispatch);
1894 EmitBlock(OMPBeforeScanBlock);
1895 }
1896
1897 // Emit loop variables for C++ range loops.
1898 const Stmt *Body =
1899 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1900 // Emit loop body.
1901 emitBody(*this, Body,
1902 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1903 Body, /*TryImperfectlyNestedLoops=*/true),
1904 D.getLoopsNumber());
1905
1906 // Jump to the dispatcher at the end of the loop body.
1907 if (IsInscanRegion)
1908 EmitBranch(OMPScanExitBlock);
1909
1910 // The end (updates/cleanups).
1911 EmitBlock(Continue.getBlock());
1912 BreakContinueStack.pop_back();
1913 }
1914
1915 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1916
1917 /// Emit a captured statement and return the function as well as its captured
1918 /// closure context.
emitCapturedStmtFunc(CodeGenFunction & ParentCGF,const CapturedStmt * S)1919 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1920 const CapturedStmt *S) {
1921 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1922 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1923 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1924 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1925 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1926 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1927
1928 return {F, CapStruct.getPointer(ParentCGF)};
1929 }
1930
1931 /// Emit a call to a previously captured closure.
1932 static llvm::CallInst *
emitCapturedStmtCall(CodeGenFunction & ParentCGF,EmittedClosureTy Cap,llvm::ArrayRef<llvm::Value * > Args)1933 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1934 llvm::ArrayRef<llvm::Value *> Args) {
1935 // Append the closure context to the argument.
1936 SmallVector<llvm::Value *> EffectiveArgs;
1937 EffectiveArgs.reserve(Args.size() + 1);
1938 llvm::append_range(EffectiveArgs, Args);
1939 EffectiveArgs.push_back(Cap.second);
1940
1941 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1942 }
1943
1944 llvm::CanonicalLoopInfo *
EmitOMPCollapsedCanonicalLoopNest(const Stmt * S,int Depth)1945 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1946 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1947
1948 EmitStmt(S);
1949 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1950
1951 // The last added loop is the outermost one.
1952 return OMPLoopNestStack.back();
1953 }
1954
EmitOMPCanonicalLoop(const OMPCanonicalLoop * S)1955 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1956 const Stmt *SyntacticalLoop = S->getLoopStmt();
1957 if (!getLangOpts().OpenMPIRBuilder) {
1958 // Ignore if OpenMPIRBuilder is not enabled.
1959 EmitStmt(SyntacticalLoop);
1960 return;
1961 }
1962
1963 LexicalScope ForScope(*this, S->getSourceRange());
1964
1965 // Emit init statements. The Distance/LoopVar funcs may reference variable
1966 // declarations they contain.
1967 const Stmt *BodyStmt;
1968 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1969 if (const Stmt *InitStmt = For->getInit())
1970 EmitStmt(InitStmt);
1971 BodyStmt = For->getBody();
1972 } else if (const auto *RangeFor =
1973 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
1974 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
1975 EmitStmt(RangeStmt);
1976 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
1977 EmitStmt(BeginStmt);
1978 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
1979 EmitStmt(EndStmt);
1980 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
1981 EmitStmt(LoopVarStmt);
1982 BodyStmt = RangeFor->getBody();
1983 } else
1984 llvm_unreachable("Expected for-stmt or range-based for-stmt");
1985
1986 // Emit closure for later use. By-value captures will be captured here.
1987 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
1988 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
1989 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
1990 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
1991
1992 // Call the distance function to get the number of iterations of the loop to
1993 // come.
1994 QualType LogicalTy = DistanceFunc->getCapturedDecl()
1995 ->getParam(0)
1996 ->getType()
1997 .getNonReferenceType();
1998 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
1999 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2000 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2001
2002 // Emit the loop structure.
2003 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2004 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2005 llvm::Value *IndVar) {
2006 Builder.restoreIP(CodeGenIP);
2007
2008 // Emit the loop body: Convert the logical iteration number to the loop
2009 // variable and emit the body.
2010 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2011 LValue LCVal = EmitLValue(LoopVarRef);
2012 Address LoopVarAddress = LCVal.getAddress(*this);
2013 emitCapturedStmtCall(*this, LoopVarClosure,
2014 {LoopVarAddress.getPointer(), IndVar});
2015
2016 RunCleanupsScope BodyScope(*this);
2017 EmitStmt(BodyStmt);
2018 };
2019 llvm::CanonicalLoopInfo *CL =
2020 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2021
2022 // Finish up the loop.
2023 Builder.restoreIP(CL->getAfterIP());
2024 ForScope.ForceCleanup();
2025
2026 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2027 OMPLoopNestStack.push_back(CL);
2028 }
2029
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)2030 void CodeGenFunction::EmitOMPInnerLoop(
2031 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2032 const Expr *IncExpr,
2033 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2034 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2035 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2036
2037 // Start the loop with a block that tests the condition.
2038 auto CondBlock = createBasicBlock("omp.inner.for.cond");
2039 EmitBlock(CondBlock);
2040 const SourceRange R = S.getSourceRange();
2041
2042 // If attributes are attached, push to the basic block with them.
2043 const auto &OMPED = cast<OMPExecutableDirective>(S);
2044 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2045 const Stmt *SS = ICS->getCapturedStmt();
2046 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2047 OMPLoopNestStack.clear();
2048 if (AS)
2049 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2050 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2051 SourceLocToDebugLoc(R.getEnd()));
2052 else
2053 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2054 SourceLocToDebugLoc(R.getEnd()));
2055
2056 // If there are any cleanups between here and the loop-exit scope,
2057 // create a block to stage a loop exit along.
2058 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2059 if (RequiresCleanup)
2060 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2061
2062 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2063
2064 // Emit condition.
2065 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2066 if (ExitBlock != LoopExit.getBlock()) {
2067 EmitBlock(ExitBlock);
2068 EmitBranchThroughCleanup(LoopExit);
2069 }
2070
2071 EmitBlock(LoopBody);
2072 incrementProfileCounter(&S);
2073
2074 // Create a block for the increment.
2075 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2076 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2077
2078 BodyGen(*this);
2079
2080 // Emit "IV = IV + 1" and a back-edge to the condition block.
2081 EmitBlock(Continue.getBlock());
2082 EmitIgnoredExpr(IncExpr);
2083 PostIncGen(*this);
2084 BreakContinueStack.pop_back();
2085 EmitBranch(CondBlock);
2086 LoopStack.pop();
2087 // Emit the fall-through block.
2088 EmitBlock(LoopExit.getBlock());
2089 }
2090
EmitOMPLinearClauseInit(const OMPLoopDirective & D)2091 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2092 if (!HaveInsertPoint())
2093 return false;
2094 // Emit inits for the linear variables.
2095 bool HasLinears = false;
2096 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2097 for (const Expr *Init : C->inits()) {
2098 HasLinears = true;
2099 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2100 if (const auto *Ref =
2101 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2102 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2103 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2104 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2105 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2106 VD->getInit()->getType(), VK_LValue,
2107 VD->getInit()->getExprLoc());
2108 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
2109 VD->getType()),
2110 /*capturedByInit=*/false);
2111 EmitAutoVarCleanups(Emission);
2112 } else {
2113 EmitVarDecl(*VD);
2114 }
2115 }
2116 // Emit the linear steps for the linear clauses.
2117 // If a step is not constant, it is pre-calculated before the loop.
2118 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2119 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2120 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2121 // Emit calculation of the linear step.
2122 EmitIgnoredExpr(CS);
2123 }
2124 }
2125 return HasLinears;
2126 }
2127
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2128 void CodeGenFunction::EmitOMPLinearClauseFinal(
2129 const OMPLoopDirective &D,
2130 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2131 if (!HaveInsertPoint())
2132 return;
2133 llvm::BasicBlock *DoneBB = nullptr;
2134 // Emit the final values of the linear variables.
2135 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2136 auto IC = C->varlist_begin();
2137 for (const Expr *F : C->finals()) {
2138 if (!DoneBB) {
2139 if (llvm::Value *Cond = CondGen(*this)) {
2140 // If the first post-update expression is found, emit conditional
2141 // block if it was requested.
2142 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2143 DoneBB = createBasicBlock(".omp.linear.pu.done");
2144 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2145 EmitBlock(ThenBB);
2146 }
2147 }
2148 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2149 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2150 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2151 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2152 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2153 CodeGenFunction::OMPPrivateScope VarScope(*this);
2154 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2155 (void)VarScope.Privatize();
2156 EmitIgnoredExpr(F);
2157 ++IC;
2158 }
2159 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2160 EmitIgnoredExpr(PostUpdate);
2161 }
2162 if (DoneBB)
2163 EmitBlock(DoneBB, /*IsFinished=*/true);
2164 }
2165
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2166 static void emitAlignedClause(CodeGenFunction &CGF,
2167 const OMPExecutableDirective &D) {
2168 if (!CGF.HaveInsertPoint())
2169 return;
2170 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2171 llvm::APInt ClauseAlignment(64, 0);
2172 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2173 auto *AlignmentCI =
2174 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2175 ClauseAlignment = AlignmentCI->getValue();
2176 }
2177 for (const Expr *E : Clause->varlists()) {
2178 llvm::APInt Alignment(ClauseAlignment);
2179 if (Alignment == 0) {
2180 // OpenMP [2.8.1, Description]
2181 // If no optional parameter is specified, implementation-defined default
2182 // alignments for SIMD instructions on the target platforms are assumed.
2183 Alignment =
2184 CGF.getContext()
2185 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2186 E->getType()->getPointeeType()))
2187 .getQuantity();
2188 }
2189 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2190 "alignment is not power of 2");
2191 if (Alignment != 0) {
2192 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2193 CGF.emitAlignmentAssumption(
2194 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2195 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2196 }
2197 }
2198 }
2199 }
2200
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2201 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2202 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2203 if (!HaveInsertPoint())
2204 return;
2205 auto I = S.private_counters().begin();
2206 for (const Expr *E : S.counters()) {
2207 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2208 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2209 // Emit var without initialization.
2210 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2211 EmitAutoVarCleanups(VarEmission);
2212 LocalDeclMap.erase(PrivateVD);
2213 (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2214 return VarEmission.getAllocatedAddress();
2215 });
2216 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2217 VD->hasGlobalStorage()) {
2218 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2219 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2220 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2221 E->getType(), VK_LValue, E->getExprLoc());
2222 return EmitLValue(&DRE).getAddress(*this);
2223 });
2224 } else {
2225 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2226 return VarEmission.getAllocatedAddress();
2227 });
2228 }
2229 ++I;
2230 }
2231 // Privatize extra loop counters used in loops for ordered(n) clauses.
2232 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2233 if (!C->getNumForLoops())
2234 continue;
2235 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2236 I < E; ++I) {
2237 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2238 const auto *VD = cast<VarDecl>(DRE->getDecl());
2239 // Override only those variables that can be captured to avoid re-emission
2240 // of the variables declared within the loops.
2241 if (DRE->refersToEnclosingVariableOrCapture()) {
2242 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2243 return CreateMemTemp(DRE->getType(), VD->getName());
2244 });
2245 }
2246 }
2247 }
2248 }
2249
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2250 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2251 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2252 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2253 if (!CGF.HaveInsertPoint())
2254 return;
2255 {
2256 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2257 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2258 (void)PreCondScope.Privatize();
2259 // Get initial values of real counters.
2260 for (const Expr *I : S.inits()) {
2261 CGF.EmitIgnoredExpr(I);
2262 }
2263 }
2264 // Create temp loop control variables with their init values to support
2265 // non-rectangular loops.
2266 CodeGenFunction::OMPMapVars PreCondVars;
2267 for (const Expr * E: S.dependent_counters()) {
2268 if (!E)
2269 continue;
2270 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2271 "dependent counter must not be an iterator.");
2272 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2273 Address CounterAddr =
2274 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2275 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2276 }
2277 (void)PreCondVars.apply(CGF);
2278 for (const Expr *E : S.dependent_inits()) {
2279 if (!E)
2280 continue;
2281 CGF.EmitIgnoredExpr(E);
2282 }
2283 // Check that loop is executed at least one time.
2284 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2285 PreCondVars.restore(CGF);
2286 }
2287
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2288 void CodeGenFunction::EmitOMPLinearClause(
2289 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2290 if (!HaveInsertPoint())
2291 return;
2292 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2293 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2294 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2295 for (const Expr *C : LoopDirective->counters()) {
2296 SIMDLCVs.insert(
2297 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2298 }
2299 }
2300 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2301 auto CurPrivate = C->privates().begin();
2302 for (const Expr *E : C->varlists()) {
2303 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2304 const auto *PrivateVD =
2305 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2306 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2307 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2308 // Emit private VarDecl with copy init.
2309 EmitVarDecl(*PrivateVD);
2310 return GetAddrOfLocalVar(PrivateVD);
2311 });
2312 assert(IsRegistered && "linear var already registered as private");
2313 // Silence the warning about unused variable.
2314 (void)IsRegistered;
2315 } else {
2316 EmitVarDecl(*PrivateVD);
2317 }
2318 ++CurPrivate;
2319 }
2320 }
2321 }
2322
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,bool IsMonotonic)2323 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2324 const OMPExecutableDirective &D,
2325 bool IsMonotonic) {
2326 if (!CGF.HaveInsertPoint())
2327 return;
2328 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2329 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2330 /*ignoreResult=*/true);
2331 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2332 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2333 // In presence of finite 'safelen', it may be unsafe to mark all
2334 // the memory instructions parallel, because loop-carried
2335 // dependences of 'safelen' iterations are possible.
2336 if (!IsMonotonic)
2337 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2338 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2339 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2340 /*ignoreResult=*/true);
2341 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2342 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2343 // In presence of finite 'safelen', it may be unsafe to mark all
2344 // the memory instructions parallel, because loop-carried
2345 // dependences of 'safelen' iterations are possible.
2346 CGF.LoopStack.setParallel(/*Enable=*/false);
2347 }
2348 }
2349
EmitOMPSimdInit(const OMPLoopDirective & D,bool IsMonotonic)2350 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
2351 bool IsMonotonic) {
2352 // Walk clauses and process safelen/lastprivate.
2353 LoopStack.setParallel(!IsMonotonic);
2354 LoopStack.setVectorizeEnable();
2355 emitSimdlenSafelenClause(*this, D, IsMonotonic);
2356 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2357 if (C->getKind() == OMPC_ORDER_concurrent)
2358 LoopStack.setParallel(/*Enable=*/true);
2359 if ((D.getDirectiveKind() == OMPD_simd ||
2360 (getLangOpts().OpenMPSimd &&
2361 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2362 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2363 [](const OMPReductionClause *C) {
2364 return C->getModifier() == OMPC_REDUCTION_inscan;
2365 }))
2366 // Disable parallel access in case of prefix sum.
2367 LoopStack.setParallel(/*Enable=*/false);
2368 }
2369
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2370 void CodeGenFunction::EmitOMPSimdFinal(
2371 const OMPLoopDirective &D,
2372 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2373 if (!HaveInsertPoint())
2374 return;
2375 llvm::BasicBlock *DoneBB = nullptr;
2376 auto IC = D.counters().begin();
2377 auto IPC = D.private_counters().begin();
2378 for (const Expr *F : D.finals()) {
2379 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2380 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2381 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2382 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2383 OrigVD->hasGlobalStorage() || CED) {
2384 if (!DoneBB) {
2385 if (llvm::Value *Cond = CondGen(*this)) {
2386 // If the first post-update expression is found, emit conditional
2387 // block if it was requested.
2388 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2389 DoneBB = createBasicBlock(".omp.final.done");
2390 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2391 EmitBlock(ThenBB);
2392 }
2393 }
2394 Address OrigAddr = Address::invalid();
2395 if (CED) {
2396 OrigAddr =
2397 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2398 } else {
2399 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2400 /*RefersToEnclosingVariableOrCapture=*/false,
2401 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2402 OrigAddr = EmitLValue(&DRE).getAddress(*this);
2403 }
2404 OMPPrivateScope VarScope(*this);
2405 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2406 (void)VarScope.Privatize();
2407 EmitIgnoredExpr(F);
2408 }
2409 ++IC;
2410 ++IPC;
2411 }
2412 if (DoneBB)
2413 EmitBlock(DoneBB, /*IsFinished=*/true);
2414 }
2415
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2416 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2417 const OMPLoopDirective &S,
2418 CodeGenFunction::JumpDest LoopExit) {
2419 CGF.EmitOMPLoopBody(S, LoopExit);
2420 CGF.EmitStopPoint(&S);
2421 }
2422
2423 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2424 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2425 const DeclRefExpr *Helper) {
2426 auto VDecl = cast<VarDecl>(Helper->getDecl());
2427 CGF.EmitVarDecl(*VDecl);
2428 return CGF.EmitLValue(Helper);
2429 }
2430
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2431 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2432 const RegionCodeGenTy &SimdInitGen,
2433 const RegionCodeGenTy &BodyCodeGen) {
2434 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2435 PrePostActionTy &) {
2436 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2437 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2438 SimdInitGen(CGF);
2439
2440 BodyCodeGen(CGF);
2441 };
2442 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2443 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2444 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2445
2446 BodyCodeGen(CGF);
2447 };
2448 const Expr *IfCond = nullptr;
2449 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2450 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2451 if (CGF.getLangOpts().OpenMP >= 50 &&
2452 (C->getNameModifier() == OMPD_unknown ||
2453 C->getNameModifier() == OMPD_simd)) {
2454 IfCond = C->getCondition();
2455 break;
2456 }
2457 }
2458 }
2459 if (IfCond) {
2460 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2461 } else {
2462 RegionCodeGenTy ThenRCG(ThenGen);
2463 ThenRCG(CGF);
2464 }
2465 }
2466
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2467 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2468 PrePostActionTy &Action) {
2469 Action.Enter(CGF);
2470 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2471 "Expected simd directive");
2472 OMPLoopScope PreInitScope(CGF, S);
2473 // if (PreCond) {
2474 // for (IV in 0..LastIteration) BODY;
2475 // <Final counter/linear vars updates>;
2476 // }
2477 //
2478 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2479 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2480 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2481 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2482 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2483 }
2484
2485 // Emit: if (PreCond) - begin.
2486 // If the condition constant folds and can be elided, avoid emitting the
2487 // whole loop.
2488 bool CondConstant;
2489 llvm::BasicBlock *ContBlock = nullptr;
2490 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2491 if (!CondConstant)
2492 return;
2493 } else {
2494 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2495 ContBlock = CGF.createBasicBlock("simd.if.end");
2496 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2497 CGF.getProfileCount(&S));
2498 CGF.EmitBlock(ThenBlock);
2499 CGF.incrementProfileCounter(&S);
2500 }
2501
2502 // Emit the loop iteration variable.
2503 const Expr *IVExpr = S.getIterationVariable();
2504 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2505 CGF.EmitVarDecl(*IVDecl);
2506 CGF.EmitIgnoredExpr(S.getInit());
2507
2508 // Emit the iterations count variable.
2509 // If it is not a variable, Sema decided to calculate iterations count on
2510 // each iteration (e.g., it is foldable into a constant).
2511 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2512 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2513 // Emit calculation of the iterations count.
2514 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2515 }
2516
2517 emitAlignedClause(CGF, S);
2518 (void)CGF.EmitOMPLinearClauseInit(S);
2519 {
2520 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2521 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2522 CGF.EmitOMPLinearClause(S, LoopScope);
2523 CGF.EmitOMPPrivateClause(S, LoopScope);
2524 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2525 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2526 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2527 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2528 (void)LoopScope.Privatize();
2529 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2530 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2531
2532 emitCommonSimdLoop(
2533 CGF, S,
2534 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2535 CGF.EmitOMPSimdInit(S);
2536 },
2537 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2538 CGF.EmitOMPInnerLoop(
2539 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2540 [&S](CodeGenFunction &CGF) {
2541 emitOMPLoopBodyWithStopPoint(CGF, S,
2542 CodeGenFunction::JumpDest());
2543 },
2544 [](CodeGenFunction &) {});
2545 });
2546 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2547 // Emit final copy of the lastprivate variables at the end of loops.
2548 if (HasLastprivateClause)
2549 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2550 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2551 emitPostUpdateForReductionClause(CGF, S,
2552 [](CodeGenFunction &) { return nullptr; });
2553 }
2554 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2555 // Emit: if (PreCond) - end.
2556 if (ContBlock) {
2557 CGF.EmitBranch(ContBlock);
2558 CGF.EmitBlock(ContBlock, true);
2559 }
2560 }
2561
EmitOMPSimdDirective(const OMPSimdDirective & S)2562 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2563 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2564 OMPFirstScanLoop = true;
2565 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2566 emitOMPSimdRegion(CGF, S, Action);
2567 };
2568 {
2569 auto LPCRegion =
2570 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2571 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2572 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2573 }
2574 // Check for outer lastprivate conditional update.
2575 checkForLastprivateConditionalUpdate(*this, S);
2576 }
2577
EmitOMPTileDirective(const OMPTileDirective & S)2578 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2579 // Emit the de-sugared statement.
2580 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2581 EmitStmt(S.getTransformedStmt());
2582 }
2583
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2584 void CodeGenFunction::EmitOMPOuterLoop(
2585 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2586 CodeGenFunction::OMPPrivateScope &LoopScope,
2587 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2588 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2589 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2590 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2591
2592 const Expr *IVExpr = S.getIterationVariable();
2593 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2594 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2595
2596 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2597
2598 // Start the loop with a block that tests the condition.
2599 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2600 EmitBlock(CondBlock);
2601 const SourceRange R = S.getSourceRange();
2602 OMPLoopNestStack.clear();
2603 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2604 SourceLocToDebugLoc(R.getEnd()));
2605
2606 llvm::Value *BoolCondVal = nullptr;
2607 if (!DynamicOrOrdered) {
2608 // UB = min(UB, GlobalUB) or
2609 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2610 // 'distribute parallel for')
2611 EmitIgnoredExpr(LoopArgs.EUB);
2612 // IV = LB
2613 EmitIgnoredExpr(LoopArgs.Init);
2614 // IV < UB
2615 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2616 } else {
2617 BoolCondVal =
2618 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2619 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2620 }
2621
2622 // If there are any cleanups between here and the loop-exit scope,
2623 // create a block to stage a loop exit along.
2624 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2625 if (LoopScope.requiresCleanups())
2626 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2627
2628 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2629 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2630 if (ExitBlock != LoopExit.getBlock()) {
2631 EmitBlock(ExitBlock);
2632 EmitBranchThroughCleanup(LoopExit);
2633 }
2634 EmitBlock(LoopBody);
2635
2636 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2637 // LB for loop condition and emitted it above).
2638 if (DynamicOrOrdered)
2639 EmitIgnoredExpr(LoopArgs.Init);
2640
2641 // Create a block for the increment.
2642 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2643 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2644
2645 emitCommonSimdLoop(
2646 *this, S,
2647 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2648 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2649 // with dynamic/guided scheduling and without ordered clause.
2650 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2651 CGF.LoopStack.setParallel(!IsMonotonic);
2652 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2653 if (C->getKind() == OMPC_ORDER_concurrent)
2654 CGF.LoopStack.setParallel(/*Enable=*/true);
2655 } else {
2656 CGF.EmitOMPSimdInit(S, IsMonotonic);
2657 }
2658 },
2659 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2660 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2661 SourceLocation Loc = S.getBeginLoc();
2662 // when 'distribute' is not combined with a 'for':
2663 // while (idx <= UB) { BODY; ++idx; }
2664 // when 'distribute' is combined with a 'for'
2665 // (e.g. 'distribute parallel for')
2666 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2667 CGF.EmitOMPInnerLoop(
2668 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2669 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2670 CodeGenLoop(CGF, S, LoopExit);
2671 },
2672 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2673 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2674 });
2675 });
2676
2677 EmitBlock(Continue.getBlock());
2678 BreakContinueStack.pop_back();
2679 if (!DynamicOrOrdered) {
2680 // Emit "LB = LB + Stride", "UB = UB + Stride".
2681 EmitIgnoredExpr(LoopArgs.NextLB);
2682 EmitIgnoredExpr(LoopArgs.NextUB);
2683 }
2684
2685 EmitBranch(CondBlock);
2686 OMPLoopNestStack.clear();
2687 LoopStack.pop();
2688 // Emit the fall-through block.
2689 EmitBlock(LoopExit.getBlock());
2690
2691 // Tell the runtime we are done.
2692 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2693 if (!DynamicOrOrdered)
2694 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2695 S.getDirectiveKind());
2696 };
2697 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2698 }
2699
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2700 void CodeGenFunction::EmitOMPForOuterLoop(
2701 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2702 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2703 const OMPLoopArguments &LoopArgs,
2704 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2705 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2706
2707 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2708 const bool DynamicOrOrdered =
2709 Ordered || RT.isDynamic(ScheduleKind.Schedule);
2710
2711 assert((Ordered ||
2712 !RT.isStaticNonchunked(ScheduleKind.Schedule,
2713 LoopArgs.Chunk != nullptr)) &&
2714 "static non-chunked schedule does not need outer loop");
2715
2716 // Emit outer loop.
2717 //
2718 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2719 // When schedule(dynamic,chunk_size) is specified, the iterations are
2720 // distributed to threads in the team in chunks as the threads request them.
2721 // Each thread executes a chunk of iterations, then requests another chunk,
2722 // until no chunks remain to be distributed. Each chunk contains chunk_size
2723 // iterations, except for the last chunk to be distributed, which may have
2724 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2725 //
2726 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2727 // to threads in the team in chunks as the executing threads request them.
2728 // Each thread executes a chunk of iterations, then requests another chunk,
2729 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2730 // each chunk is proportional to the number of unassigned iterations divided
2731 // by the number of threads in the team, decreasing to 1. For a chunk_size
2732 // with value k (greater than 1), the size of each chunk is determined in the
2733 // same way, with the restriction that the chunks do not contain fewer than k
2734 // iterations (except for the last chunk to be assigned, which may have fewer
2735 // than k iterations).
2736 //
2737 // When schedule(auto) is specified, the decision regarding scheduling is
2738 // delegated to the compiler and/or runtime system. The programmer gives the
2739 // implementation the freedom to choose any possible mapping of iterations to
2740 // threads in the team.
2741 //
2742 // When schedule(runtime) is specified, the decision regarding scheduling is
2743 // deferred until run time, and the schedule and chunk size are taken from the
2744 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2745 // implementation defined
2746 //
2747 // while(__kmpc_dispatch_next(&LB, &UB)) {
2748 // idx = LB;
2749 // while (idx <= UB) { BODY; ++idx;
2750 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2751 // } // inner loop
2752 // }
2753 //
2754 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2755 // When schedule(static, chunk_size) is specified, iterations are divided into
2756 // chunks of size chunk_size, and the chunks are assigned to the threads in
2757 // the team in a round-robin fashion in the order of the thread number.
2758 //
2759 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2760 // while (idx <= UB) { BODY; ++idx; } // inner loop
2761 // LB = LB + ST;
2762 // UB = UB + ST;
2763 // }
2764 //
2765
2766 const Expr *IVExpr = S.getIterationVariable();
2767 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2768 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2769
2770 if (DynamicOrOrdered) {
2771 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2772 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2773 llvm::Value *LBVal = DispatchBounds.first;
2774 llvm::Value *UBVal = DispatchBounds.second;
2775 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2776 LoopArgs.Chunk};
2777 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2778 IVSigned, Ordered, DipatchRTInputValues);
2779 } else {
2780 CGOpenMPRuntime::StaticRTInput StaticInit(
2781 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2782 LoopArgs.ST, LoopArgs.Chunk);
2783 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2784 ScheduleKind, StaticInit);
2785 }
2786
2787 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2788 const unsigned IVSize,
2789 const bool IVSigned) {
2790 if (Ordered) {
2791 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2792 IVSigned);
2793 }
2794 };
2795
2796 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2797 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2798 OuterLoopArgs.IncExpr = S.getInc();
2799 OuterLoopArgs.Init = S.getInit();
2800 OuterLoopArgs.Cond = S.getCond();
2801 OuterLoopArgs.NextLB = S.getNextLowerBound();
2802 OuterLoopArgs.NextUB = S.getNextUpperBound();
2803 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2804 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2805 }
2806
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)2807 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2808 const unsigned IVSize, const bool IVSigned) {}
2809
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)2810 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2811 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2812 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2813 const CodeGenLoopTy &CodeGenLoopContent) {
2814
2815 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2816
2817 // Emit outer loop.
2818 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2819 // dynamic
2820 //
2821
2822 const Expr *IVExpr = S.getIterationVariable();
2823 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2824 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2825
2826 CGOpenMPRuntime::StaticRTInput StaticInit(
2827 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2828 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2829 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2830
2831 // for combined 'distribute' and 'for' the increment expression of distribute
2832 // is stored in DistInc. For 'distribute' alone, it is in Inc.
2833 Expr *IncExpr;
2834 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2835 IncExpr = S.getDistInc();
2836 else
2837 IncExpr = S.getInc();
2838
2839 // this routine is shared by 'omp distribute parallel for' and
2840 // 'omp distribute': select the right EUB expression depending on the
2841 // directive
2842 OMPLoopArguments OuterLoopArgs;
2843 OuterLoopArgs.LB = LoopArgs.LB;
2844 OuterLoopArgs.UB = LoopArgs.UB;
2845 OuterLoopArgs.ST = LoopArgs.ST;
2846 OuterLoopArgs.IL = LoopArgs.IL;
2847 OuterLoopArgs.Chunk = LoopArgs.Chunk;
2848 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2849 ? S.getCombinedEnsureUpperBound()
2850 : S.getEnsureUpperBound();
2851 OuterLoopArgs.IncExpr = IncExpr;
2852 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2853 ? S.getCombinedInit()
2854 : S.getInit();
2855 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2856 ? S.getCombinedCond()
2857 : S.getCond();
2858 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2859 ? S.getCombinedNextLowerBound()
2860 : S.getNextLowerBound();
2861 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2862 ? S.getCombinedNextUpperBound()
2863 : S.getNextUpperBound();
2864
2865 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2866 LoopScope, OuterLoopArgs, CodeGenLoopContent,
2867 emitEmptyOrdered);
2868 }
2869
2870 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)2871 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2872 const OMPExecutableDirective &S) {
2873 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2874 LValue LB =
2875 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2876 LValue UB =
2877 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2878
2879 // When composing 'distribute' with 'for' (e.g. as in 'distribute
2880 // parallel for') we need to use the 'distribute'
2881 // chunk lower and upper bounds rather than the whole loop iteration
2882 // space. These are parameters to the outlined function for 'parallel'
2883 // and we copy the bounds of the previous schedule into the
2884 // the current ones.
2885 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2886 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2887 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2888 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2889 PrevLBVal = CGF.EmitScalarConversion(
2890 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2891 LS.getIterationVariable()->getType(),
2892 LS.getPrevLowerBoundVariable()->getExprLoc());
2893 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2894 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2895 PrevUBVal = CGF.EmitScalarConversion(
2896 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2897 LS.getIterationVariable()->getType(),
2898 LS.getPrevUpperBoundVariable()->getExprLoc());
2899
2900 CGF.EmitStoreOfScalar(PrevLBVal, LB);
2901 CGF.EmitStoreOfScalar(PrevUBVal, UB);
2902
2903 return {LB, UB};
2904 }
2905
2906 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2907 /// we need to use the LB and UB expressions generated by the worksharing
2908 /// code generation support, whereas in non combined situations we would
2909 /// just emit 0 and the LastIteration expression
2910 /// This function is necessary due to the difference of the LB and UB
2911 /// types for the RT emission routines for 'for_static_init' and
2912 /// 'for_dispatch_init'
2913 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)2914 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2915 const OMPExecutableDirective &S,
2916 Address LB, Address UB) {
2917 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2918 const Expr *IVExpr = LS.getIterationVariable();
2919 // when implementing a dynamic schedule for a 'for' combined with a
2920 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2921 // is not normalized as each team only executes its own assigned
2922 // distribute chunk
2923 QualType IteratorTy = IVExpr->getType();
2924 llvm::Value *LBVal =
2925 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2926 llvm::Value *UBVal =
2927 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2928 return {LBVal, UBVal};
2929 }
2930
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)2931 static void emitDistributeParallelForDistributeInnerBoundParams(
2932 CodeGenFunction &CGF, const OMPExecutableDirective &S,
2933 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2934 const auto &Dir = cast<OMPLoopDirective>(S);
2935 LValue LB =
2936 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2937 llvm::Value *LBCast =
2938 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2939 CGF.SizeTy, /*isSigned=*/false);
2940 CapturedVars.push_back(LBCast);
2941 LValue UB =
2942 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2943
2944 llvm::Value *UBCast =
2945 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2946 CGF.SizeTy, /*isSigned=*/false);
2947 CapturedVars.push_back(UBCast);
2948 }
2949
2950 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2951 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2952 const OMPLoopDirective &S,
2953 CodeGenFunction::JumpDest LoopExit) {
2954 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2955 PrePostActionTy &Action) {
2956 Action.Enter(CGF);
2957 bool HasCancel = false;
2958 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2959 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2960 HasCancel = D->hasCancel();
2961 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2962 HasCancel = D->hasCancel();
2963 else if (const auto *D =
2964 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2965 HasCancel = D->hasCancel();
2966 }
2967 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2968 HasCancel);
2969 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2970 emitDistributeParallelForInnerBounds,
2971 emitDistributeParallelForDispatchBounds);
2972 };
2973
2974 emitCommonOMPParallelDirective(
2975 CGF, S,
2976 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2977 CGInlinedWorksharingLoop,
2978 emitDistributeParallelForDistributeInnerBoundParams);
2979 }
2980
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)2981 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2982 const OMPDistributeParallelForDirective &S) {
2983 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2984 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2985 S.getDistInc());
2986 };
2987 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2988 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2989 }
2990
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)2991 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2992 const OMPDistributeParallelForSimdDirective &S) {
2993 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2994 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2995 S.getDistInc());
2996 };
2997 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2998 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2999 }
3000
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)3001 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3002 const OMPDistributeSimdDirective &S) {
3003 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3004 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3005 };
3006 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3007 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3008 }
3009
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)3010 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3011 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3012 // Emit SPMD target parallel for region as a standalone region.
3013 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3014 emitOMPSimdRegion(CGF, S, Action);
3015 };
3016 llvm::Function *Fn;
3017 llvm::Constant *Addr;
3018 // Emit target region as a standalone region.
3019 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3020 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3021 assert(Fn && Addr && "Target device function emission failed.");
3022 }
3023
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)3024 void CodeGenFunction::EmitOMPTargetSimdDirective(
3025 const OMPTargetSimdDirective &S) {
3026 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3027 emitOMPSimdRegion(CGF, S, Action);
3028 };
3029 emitCommonOMPTargetDirective(*this, S, CodeGen);
3030 }
3031
3032 namespace {
3033 struct ScheduleKindModifiersTy {
3034 OpenMPScheduleClauseKind Kind;
3035 OpenMPScheduleClauseModifier M1;
3036 OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anonae662f404111::ScheduleKindModifiersTy3037 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3038 OpenMPScheduleClauseModifier M1,
3039 OpenMPScheduleClauseModifier M2)
3040 : Kind(Kind), M1(M1), M2(M2) {}
3041 };
3042 } // namespace
3043
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)3044 bool CodeGenFunction::EmitOMPWorksharingLoop(
3045 const OMPLoopDirective &S, Expr *EUB,
3046 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3047 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3048 // Emit the loop iteration variable.
3049 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3050 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3051 EmitVarDecl(*IVDecl);
3052
3053 // Emit the iterations count variable.
3054 // If it is not a variable, Sema decided to calculate iterations count on each
3055 // iteration (e.g., it is foldable into a constant).
3056 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3057 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3058 // Emit calculation of the iterations count.
3059 EmitIgnoredExpr(S.getCalcLastIteration());
3060 }
3061
3062 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3063
3064 bool HasLastprivateClause;
3065 // Check pre-condition.
3066 {
3067 OMPLoopScope PreInitScope(*this, S);
3068 // Skip the entire loop if we don't meet the precondition.
3069 // If the condition constant folds and can be elided, avoid emitting the
3070 // whole loop.
3071 bool CondConstant;
3072 llvm::BasicBlock *ContBlock = nullptr;
3073 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3074 if (!CondConstant)
3075 return false;
3076 } else {
3077 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3078 ContBlock = createBasicBlock("omp.precond.end");
3079 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3080 getProfileCount(&S));
3081 EmitBlock(ThenBlock);
3082 incrementProfileCounter(&S);
3083 }
3084
3085 RunCleanupsScope DoacrossCleanupScope(*this);
3086 bool Ordered = false;
3087 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3088 if (OrderedClause->getNumForLoops())
3089 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3090 else
3091 Ordered = true;
3092 }
3093
3094 llvm::DenseSet<const Expr *> EmittedFinals;
3095 emitAlignedClause(*this, S);
3096 bool HasLinears = EmitOMPLinearClauseInit(S);
3097 // Emit helper vars inits.
3098
3099 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3100 LValue LB = Bounds.first;
3101 LValue UB = Bounds.second;
3102 LValue ST =
3103 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3104 LValue IL =
3105 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3106
3107 // Emit 'then' code.
3108 {
3109 OMPPrivateScope LoopScope(*this);
3110 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3111 // Emit implicit barrier to synchronize threads and avoid data races on
3112 // initialization of firstprivate variables and post-update of
3113 // lastprivate variables.
3114 CGM.getOpenMPRuntime().emitBarrierCall(
3115 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3116 /*ForceSimpleCall=*/true);
3117 }
3118 EmitOMPPrivateClause(S, LoopScope);
3119 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3120 *this, S, EmitLValue(S.getIterationVariable()));
3121 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3122 EmitOMPReductionClauseInit(S, LoopScope);
3123 EmitOMPPrivateLoopCounters(S, LoopScope);
3124 EmitOMPLinearClause(S, LoopScope);
3125 (void)LoopScope.Privatize();
3126 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3127 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3128
3129 // Detect the loop schedule kind and chunk.
3130 const Expr *ChunkExpr = nullptr;
3131 OpenMPScheduleTy ScheduleKind;
3132 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3133 ScheduleKind.Schedule = C->getScheduleKind();
3134 ScheduleKind.M1 = C->getFirstScheduleModifier();
3135 ScheduleKind.M2 = C->getSecondScheduleModifier();
3136 ChunkExpr = C->getChunkSize();
3137 } else {
3138 // Default behaviour for schedule clause.
3139 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3140 *this, S, ScheduleKind.Schedule, ChunkExpr);
3141 }
3142 bool HasChunkSizeOne = false;
3143 llvm::Value *Chunk = nullptr;
3144 if (ChunkExpr) {
3145 Chunk = EmitScalarExpr(ChunkExpr);
3146 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3147 S.getIterationVariable()->getType(),
3148 S.getBeginLoc());
3149 Expr::EvalResult Result;
3150 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3151 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3152 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3153 }
3154 }
3155 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3156 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3157 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3158 // If the static schedule kind is specified or if the ordered clause is
3159 // specified, and if no monotonic modifier is specified, the effect will
3160 // be as if the monotonic modifier was specified.
3161 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
3162 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
3163 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3164 bool IsMonotonic =
3165 Ordered ||
3166 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
3167 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
3168 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3169 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3170 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3171 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3172 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3173 /* Chunked */ Chunk != nullptr) ||
3174 StaticChunkedOne) &&
3175 !Ordered) {
3176 JumpDest LoopExit =
3177 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3178 emitCommonSimdLoop(
3179 *this, S,
3180 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
3181 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3182 CGF.EmitOMPSimdInit(S, IsMonotonic);
3183 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3184 if (C->getKind() == OMPC_ORDER_concurrent)
3185 CGF.LoopStack.setParallel(/*Enable=*/true);
3186 }
3187 },
3188 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3189 &S, ScheduleKind, LoopExit,
3190 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3191 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3192 // When no chunk_size is specified, the iteration space is divided
3193 // into chunks that are approximately equal in size, and at most
3194 // one chunk is distributed to each thread. Note that the size of
3195 // the chunks is unspecified in this case.
3196 CGOpenMPRuntime::StaticRTInput StaticInit(
3197 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3198 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3199 StaticChunkedOne ? Chunk : nullptr);
3200 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3201 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3202 StaticInit);
3203 // UB = min(UB, GlobalUB);
3204 if (!StaticChunkedOne)
3205 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3206 // IV = LB;
3207 CGF.EmitIgnoredExpr(S.getInit());
3208 // For unchunked static schedule generate:
3209 //
3210 // while (idx <= UB) {
3211 // BODY;
3212 // ++idx;
3213 // }
3214 //
3215 // For static schedule with chunk one:
3216 //
3217 // while (IV <= PrevUB) {
3218 // BODY;
3219 // IV += ST;
3220 // }
3221 CGF.EmitOMPInnerLoop(
3222 S, LoopScope.requiresCleanups(),
3223 StaticChunkedOne ? S.getCombinedParForInDistCond()
3224 : S.getCond(),
3225 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3226 [&S, LoopExit](CodeGenFunction &CGF) {
3227 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3228 },
3229 [](CodeGenFunction &) {});
3230 });
3231 EmitBlock(LoopExit.getBlock());
3232 // Tell the runtime we are done.
3233 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3234 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3235 S.getDirectiveKind());
3236 };
3237 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3238 } else {
3239 // Emit the outer loop, which requests its work chunk [LB..UB] from
3240 // runtime and runs the inner loop to process it.
3241 const OMPLoopArguments LoopArguments(
3242 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3243 IL.getAddress(*this), Chunk, EUB);
3244 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3245 LoopArguments, CGDispatchBounds);
3246 }
3247 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3248 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3249 return CGF.Builder.CreateIsNotNull(
3250 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3251 });
3252 }
3253 EmitOMPReductionClauseFinal(
3254 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3255 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3256 : /*Parallel only*/ OMPD_parallel);
3257 // Emit post-update of the reduction variables if IsLastIter != 0.
3258 emitPostUpdateForReductionClause(
3259 *this, S, [IL, &S](CodeGenFunction &CGF) {
3260 return CGF.Builder.CreateIsNotNull(
3261 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3262 });
3263 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3264 if (HasLastprivateClause)
3265 EmitOMPLastprivateClauseFinal(
3266 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3267 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3268 }
3269 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3270 return CGF.Builder.CreateIsNotNull(
3271 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3272 });
3273 DoacrossCleanupScope.ForceCleanup();
3274 // We're now done with the loop, so jump to the continuation block.
3275 if (ContBlock) {
3276 EmitBranch(ContBlock);
3277 EmitBlock(ContBlock, /*IsFinished=*/true);
3278 }
3279 }
3280 return HasLastprivateClause;
3281 }
3282
3283 /// The following two functions generate expressions for the loop lower
3284 /// and upper bounds in case of static and dynamic (dispatch) schedule
3285 /// of the associated 'for' or 'distribute' loop.
3286 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3287 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3288 const auto &LS = cast<OMPLoopDirective>(S);
3289 LValue LB =
3290 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3291 LValue UB =
3292 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3293 return {LB, UB};
3294 }
3295
3296 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3297 /// consider the lower and upper bound expressions generated by the
3298 /// worksharing loop support, but we use 0 and the iteration space size as
3299 /// constants
3300 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3301 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3302 Address LB, Address UB) {
3303 const auto &LS = cast<OMPLoopDirective>(S);
3304 const Expr *IVExpr = LS.getIterationVariable();
3305 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3306 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3307 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3308 return {LBVal, UBVal};
3309 }
3310
3311 /// Emits internal temp array declarations for the directive with inscan
3312 /// reductions.
3313 /// The code is the following:
3314 /// \code
3315 /// size num_iters = <num_iters>;
3316 /// <type> buffer[num_iters];
3317 /// \endcode
emitScanBasedDirectiveDecls(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3318 static void emitScanBasedDirectiveDecls(
3319 CodeGenFunction &CGF, const OMPLoopDirective &S,
3320 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3321 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3322 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3323 SmallVector<const Expr *, 4> Shareds;
3324 SmallVector<const Expr *, 4> Privates;
3325 SmallVector<const Expr *, 4> ReductionOps;
3326 SmallVector<const Expr *, 4> CopyArrayTemps;
3327 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3328 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3329 "Only inscan reductions are expected.");
3330 Shareds.append(C->varlist_begin(), C->varlist_end());
3331 Privates.append(C->privates().begin(), C->privates().end());
3332 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3333 CopyArrayTemps.append(C->copy_array_temps().begin(),
3334 C->copy_array_temps().end());
3335 }
3336 {
3337 // Emit buffers for each reduction variables.
3338 // ReductionCodeGen is required to emit correctly the code for array
3339 // reductions.
3340 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3341 unsigned Count = 0;
3342 auto *ITA = CopyArrayTemps.begin();
3343 for (const Expr *IRef : Privates) {
3344 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3345 // Emit variably modified arrays, used for arrays/array sections
3346 // reductions.
3347 if (PrivateVD->getType()->isVariablyModifiedType()) {
3348 RedCG.emitSharedOrigLValue(CGF, Count);
3349 RedCG.emitAggregateType(CGF, Count);
3350 }
3351 CodeGenFunction::OpaqueValueMapping DimMapping(
3352 CGF,
3353 cast<OpaqueValueExpr>(
3354 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3355 ->getSizeExpr()),
3356 RValue::get(OMPScanNumIterations));
3357 // Emit temp buffer.
3358 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3359 ++ITA;
3360 ++Count;
3361 }
3362 }
3363 }
3364
3365 /// Emits the code for the directive with inscan reductions.
3366 /// The code is the following:
3367 /// \code
3368 /// #pragma omp ...
3369 /// for (i: 0..<num_iters>) {
3370 /// <input phase>;
3371 /// buffer[i] = red;
3372 /// }
3373 /// #pragma omp master // in parallel region
3374 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3375 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3376 /// buffer[i] op= buffer[i-pow(2,k)];
3377 /// #pragma omp barrier // in parallel region
3378 /// #pragma omp ...
3379 /// for (0..<num_iters>) {
3380 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3381 /// <scan phase>;
3382 /// }
3383 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3384 static void emitScanBasedDirective(
3385 CodeGenFunction &CGF, const OMPLoopDirective &S,
3386 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3387 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3388 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3389 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3390 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3391 SmallVector<const Expr *, 4> Privates;
3392 SmallVector<const Expr *, 4> ReductionOps;
3393 SmallVector<const Expr *, 4> LHSs;
3394 SmallVector<const Expr *, 4> RHSs;
3395 SmallVector<const Expr *, 4> CopyArrayElems;
3396 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3397 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3398 "Only inscan reductions are expected.");
3399 Privates.append(C->privates().begin(), C->privates().end());
3400 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3401 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3402 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3403 CopyArrayElems.append(C->copy_array_elems().begin(),
3404 C->copy_array_elems().end());
3405 }
3406 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3407 {
3408 // Emit loop with input phase:
3409 // #pragma omp ...
3410 // for (i: 0..<num_iters>) {
3411 // <input phase>;
3412 // buffer[i] = red;
3413 // }
3414 CGF.OMPFirstScanLoop = true;
3415 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3416 FirstGen(CGF);
3417 }
3418 // #pragma omp barrier // in parallel region
3419 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3420 &ReductionOps,
3421 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3422 Action.Enter(CGF);
3423 // Emit prefix reduction:
3424 // #pragma omp master // in parallel region
3425 // for (int k = 0; k <= ceil(log2(n)); ++k)
3426 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3427 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3428 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3429 llvm::Function *F =
3430 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3431 llvm::Value *Arg =
3432 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3433 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3434 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3435 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3436 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3437 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3438 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3439 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3440 CGF.EmitBlock(LoopBB);
3441 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3442 // size pow2k = 1;
3443 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3444 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3445 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3446 // for (size i = n - 1; i >= 2 ^ k; --i)
3447 // tmp[i] op= tmp[i-pow2k];
3448 llvm::BasicBlock *InnerLoopBB =
3449 CGF.createBasicBlock("omp.inner.log.scan.body");
3450 llvm::BasicBlock *InnerExitBB =
3451 CGF.createBasicBlock("omp.inner.log.scan.exit");
3452 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3453 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3454 CGF.EmitBlock(InnerLoopBB);
3455 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3456 IVal->addIncoming(NMin1, LoopBB);
3457 {
3458 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3459 auto *ILHS = LHSs.begin();
3460 auto *IRHS = RHSs.begin();
3461 for (const Expr *CopyArrayElem : CopyArrayElems) {
3462 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3463 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3464 Address LHSAddr = Address::invalid();
3465 {
3466 CodeGenFunction::OpaqueValueMapping IdxMapping(
3467 CGF,
3468 cast<OpaqueValueExpr>(
3469 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3470 RValue::get(IVal));
3471 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3472 }
3473 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3474 Address RHSAddr = Address::invalid();
3475 {
3476 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3477 CodeGenFunction::OpaqueValueMapping IdxMapping(
3478 CGF,
3479 cast<OpaqueValueExpr>(
3480 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3481 RValue::get(OffsetIVal));
3482 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3483 }
3484 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3485 ++ILHS;
3486 ++IRHS;
3487 }
3488 PrivScope.Privatize();
3489 CGF.CGM.getOpenMPRuntime().emitReduction(
3490 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3491 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3492 }
3493 llvm::Value *NextIVal =
3494 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3495 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3496 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3497 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3498 CGF.EmitBlock(InnerExitBB);
3499 llvm::Value *Next =
3500 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3501 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3502 // pow2k <<= 1;
3503 llvm::Value *NextPow2K =
3504 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3505 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3506 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3507 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3508 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3509 CGF.EmitBlock(ExitBB);
3510 };
3511 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3512 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3513 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3514 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3515 /*ForceSimpleCall=*/true);
3516 } else {
3517 RegionCodeGenTy RCG(CodeGen);
3518 RCG(CGF);
3519 }
3520
3521 CGF.OMPFirstScanLoop = false;
3522 SecondGen(CGF);
3523 }
3524
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3525 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3526 const OMPLoopDirective &S,
3527 bool HasCancel) {
3528 bool HasLastprivates;
3529 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3530 [](const OMPReductionClause *C) {
3531 return C->getModifier() == OMPC_REDUCTION_inscan;
3532 })) {
3533 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3534 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3535 OMPLoopScope LoopScope(CGF, S);
3536 return CGF.EmitScalarExpr(S.getNumIterations());
3537 };
3538 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3539 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3540 CGF, S.getDirectiveKind(), HasCancel);
3541 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3542 emitForLoopBounds,
3543 emitDispatchForLoopBounds);
3544 // Emit an implicit barrier at the end.
3545 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3546 OMPD_for);
3547 };
3548 const auto &&SecondGen = [&S, HasCancel,
3549 &HasLastprivates](CodeGenFunction &CGF) {
3550 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3551 CGF, S.getDirectiveKind(), HasCancel);
3552 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3553 emitForLoopBounds,
3554 emitDispatchForLoopBounds);
3555 };
3556 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3557 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3558 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3559 } else {
3560 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3561 HasCancel);
3562 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3563 emitForLoopBounds,
3564 emitDispatchForLoopBounds);
3565 }
3566 return HasLastprivates;
3567 }
3568
isSupportedByOpenMPIRBuilder(const OMPForDirective & S)3569 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3570 if (S.hasCancel())
3571 return false;
3572 for (OMPClause *C : S.clauses())
3573 if (!isa<OMPNowaitClause>(C))
3574 return false;
3575
3576 return true;
3577 }
3578
EmitOMPForDirective(const OMPForDirective & S)3579 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3580 bool HasLastprivates = false;
3581 bool UseOMPIRBuilder =
3582 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3583 auto &&CodeGen = [this, &S, &HasLastprivates,
3584 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3585 // Use the OpenMPIRBuilder if enabled.
3586 if (UseOMPIRBuilder) {
3587 // Emit the associated statement and get its loop representation.
3588 const Stmt *Inner = S.getRawStmt();
3589 llvm::CanonicalLoopInfo *CLI =
3590 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3591
3592 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3593 llvm::OpenMPIRBuilder &OMPBuilder =
3594 CGM.getOpenMPRuntime().getOMPBuilder();
3595 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3596 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3597 OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
3598 return;
3599 }
3600
3601 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3602 };
3603 {
3604 auto LPCRegion =
3605 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3606 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3607 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3608 S.hasCancel());
3609 }
3610
3611 if (!UseOMPIRBuilder) {
3612 // Emit an implicit barrier at the end.
3613 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3614 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3615 }
3616 // Check for outer lastprivate conditional update.
3617 checkForLastprivateConditionalUpdate(*this, S);
3618 }
3619
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3620 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3621 bool HasLastprivates = false;
3622 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3623 PrePostActionTy &) {
3624 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3625 };
3626 {
3627 auto LPCRegion =
3628 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3629 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3630 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3631 }
3632
3633 // Emit an implicit barrier at the end.
3634 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3635 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3636 // Check for outer lastprivate conditional update.
3637 checkForLastprivateConditionalUpdate(*this, S);
3638 }
3639
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)3640 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3641 const Twine &Name,
3642 llvm::Value *Init = nullptr) {
3643 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3644 if (Init)
3645 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3646 return LVal;
3647 }
3648
EmitSections(const OMPExecutableDirective & S)3649 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3650 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3651 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3652 bool HasLastprivates = false;
3653 auto &&CodeGen = [&S, CapturedStmt, CS,
3654 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3655 const ASTContext &C = CGF.getContext();
3656 QualType KmpInt32Ty =
3657 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3658 // Emit helper vars inits.
3659 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3660 CGF.Builder.getInt32(0));
3661 llvm::ConstantInt *GlobalUBVal = CS != nullptr
3662 ? CGF.Builder.getInt32(CS->size() - 1)
3663 : CGF.Builder.getInt32(0);
3664 LValue UB =
3665 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3666 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3667 CGF.Builder.getInt32(1));
3668 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3669 CGF.Builder.getInt32(0));
3670 // Loop counter.
3671 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3672 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3673 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3674 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3675 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3676 // Generate condition for loop.
3677 BinaryOperator *Cond = BinaryOperator::Create(
3678 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3679 S.getBeginLoc(), FPOptionsOverride());
3680 // Increment for loop counter.
3681 UnaryOperator *Inc = UnaryOperator::Create(
3682 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3683 S.getBeginLoc(), true, FPOptionsOverride());
3684 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3685 // Iterate through all sections and emit a switch construct:
3686 // switch (IV) {
3687 // case 0:
3688 // <SectionStmt[0]>;
3689 // break;
3690 // ...
3691 // case <NumSection> - 1:
3692 // <SectionStmt[<NumSection> - 1]>;
3693 // break;
3694 // }
3695 // .omp.sections.exit:
3696 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3697 llvm::SwitchInst *SwitchStmt =
3698 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3699 ExitBB, CS == nullptr ? 1 : CS->size());
3700 if (CS) {
3701 unsigned CaseNumber = 0;
3702 for (const Stmt *SubStmt : CS->children()) {
3703 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3704 CGF.EmitBlock(CaseBB);
3705 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3706 CGF.EmitStmt(SubStmt);
3707 CGF.EmitBranch(ExitBB);
3708 ++CaseNumber;
3709 }
3710 } else {
3711 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3712 CGF.EmitBlock(CaseBB);
3713 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3714 CGF.EmitStmt(CapturedStmt);
3715 CGF.EmitBranch(ExitBB);
3716 }
3717 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3718 };
3719
3720 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3721 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3722 // Emit implicit barrier to synchronize threads and avoid data races on
3723 // initialization of firstprivate variables and post-update of lastprivate
3724 // variables.
3725 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3726 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3727 /*ForceSimpleCall=*/true);
3728 }
3729 CGF.EmitOMPPrivateClause(S, LoopScope);
3730 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3731 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3732 CGF.EmitOMPReductionClauseInit(S, LoopScope);
3733 (void)LoopScope.Privatize();
3734 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3735 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3736
3737 // Emit static non-chunked loop.
3738 OpenMPScheduleTy ScheduleKind;
3739 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3740 CGOpenMPRuntime::StaticRTInput StaticInit(
3741 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3742 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3743 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3744 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3745 // UB = min(UB, GlobalUB);
3746 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3747 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3748 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3749 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3750 // IV = LB;
3751 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3752 // while (idx <= UB) { BODY; ++idx; }
3753 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3754 [](CodeGenFunction &) {});
3755 // Tell the runtime we are done.
3756 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3757 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3758 S.getDirectiveKind());
3759 };
3760 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3761 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3762 // Emit post-update of the reduction variables if IsLastIter != 0.
3763 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3764 return CGF.Builder.CreateIsNotNull(
3765 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3766 });
3767
3768 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3769 if (HasLastprivates)
3770 CGF.EmitOMPLastprivateClauseFinal(
3771 S, /*NoFinals=*/false,
3772 CGF.Builder.CreateIsNotNull(
3773 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3774 };
3775
3776 bool HasCancel = false;
3777 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3778 HasCancel = OSD->hasCancel();
3779 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3780 HasCancel = OPSD->hasCancel();
3781 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3782 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3783 HasCancel);
3784 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3785 // clause. Otherwise the barrier will be generated by the codegen for the
3786 // directive.
3787 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3788 // Emit implicit barrier to synchronize threads and avoid data races on
3789 // initialization of firstprivate variables.
3790 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3791 OMPD_unknown);
3792 }
3793 }
3794
EmitOMPSectionsDirective(const OMPSectionsDirective & S)3795 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3796 if (CGM.getLangOpts().OpenMPIRBuilder) {
3797 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3798 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3799 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3800
3801 auto FiniCB = [this](InsertPointTy IP) {
3802 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3803 };
3804
3805 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
3806 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3807 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3808 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3809 if (CS) {
3810 for (const Stmt *SubStmt : CS->children()) {
3811 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
3812 InsertPointTy CodeGenIP,
3813 llvm::BasicBlock &FiniBB) {
3814 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
3815 FiniBB);
3816 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
3817 FiniBB);
3818 };
3819 SectionCBVector.push_back(SectionCB);
3820 }
3821 } else {
3822 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
3823 InsertPointTy CodeGenIP,
3824 llvm::BasicBlock &FiniBB) {
3825 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3826 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
3827 FiniBB);
3828 };
3829 SectionCBVector.push_back(SectionCB);
3830 }
3831
3832 // Privatization callback that performs appropriate action for
3833 // shared/private/firstprivate/lastprivate/copyin/... variables.
3834 //
3835 // TODO: This defaults to shared right now.
3836 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3837 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3838 // The next line is appropriate only for variables (Val) with the
3839 // data-sharing attribute "shared".
3840 ReplVal = &Val;
3841
3842 return CodeGenIP;
3843 };
3844
3845 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
3846 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3847 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3848 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3849 Builder.restoreIP(OMPBuilder.createSections(
3850 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
3851 S.getSingleClause<OMPNowaitClause>()));
3852 return;
3853 }
3854 {
3855 auto LPCRegion =
3856 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3857 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3858 EmitSections(S);
3859 }
3860 // Emit an implicit barrier at the end.
3861 if (!S.getSingleClause<OMPNowaitClause>()) {
3862 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3863 OMPD_sections);
3864 }
3865 // Check for outer lastprivate conditional update.
3866 checkForLastprivateConditionalUpdate(*this, S);
3867 }
3868
EmitOMPSectionDirective(const OMPSectionDirective & S)3869 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3870 if (CGM.getLangOpts().OpenMPIRBuilder) {
3871 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3872 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3873
3874 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
3875 auto FiniCB = [this](InsertPointTy IP) {
3876 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3877 };
3878
3879 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
3880 InsertPointTy CodeGenIP,
3881 llvm::BasicBlock &FiniBB) {
3882 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3883 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
3884 CodeGenIP, FiniBB);
3885 };
3886
3887 LexicalScope Scope(*this, S.getSourceRange());
3888 EmitStopPoint(&S);
3889 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
3890
3891 return;
3892 }
3893 LexicalScope Scope(*this, S.getSourceRange());
3894 EmitStopPoint(&S);
3895 EmitStmt(S.getAssociatedStmt());
3896 }
3897
EmitOMPSingleDirective(const OMPSingleDirective & S)3898 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3899 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3900 llvm::SmallVector<const Expr *, 8> DestExprs;
3901 llvm::SmallVector<const Expr *, 8> SrcExprs;
3902 llvm::SmallVector<const Expr *, 8> AssignmentOps;
3903 // Check if there are any 'copyprivate' clauses associated with this
3904 // 'single' construct.
3905 // Build a list of copyprivate variables along with helper expressions
3906 // (<source>, <destination>, <destination>=<source> expressions)
3907 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3908 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3909 DestExprs.append(C->destination_exprs().begin(),
3910 C->destination_exprs().end());
3911 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3912 AssignmentOps.append(C->assignment_ops().begin(),
3913 C->assignment_ops().end());
3914 }
3915 // Emit code for 'single' region along with 'copyprivate' clauses
3916 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3917 Action.Enter(CGF);
3918 OMPPrivateScope SingleScope(CGF);
3919 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3920 CGF.EmitOMPPrivateClause(S, SingleScope);
3921 (void)SingleScope.Privatize();
3922 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3923 };
3924 {
3925 auto LPCRegion =
3926 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3927 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3928 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3929 CopyprivateVars, DestExprs,
3930 SrcExprs, AssignmentOps);
3931 }
3932 // Emit an implicit barrier at the end (to avoid data race on firstprivate
3933 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3934 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3935 CGM.getOpenMPRuntime().emitBarrierCall(
3936 *this, S.getBeginLoc(),
3937 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3938 }
3939 // Check for outer lastprivate conditional update.
3940 checkForLastprivateConditionalUpdate(*this, S);
3941 }
3942
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)3943 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3944 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3945 Action.Enter(CGF);
3946 CGF.EmitStmt(S.getRawStmt());
3947 };
3948 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3949 }
3950
EmitOMPMasterDirective(const OMPMasterDirective & S)3951 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3952 if (CGM.getLangOpts().OpenMPIRBuilder) {
3953 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3954 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3955
3956 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3957
3958 auto FiniCB = [this](InsertPointTy IP) {
3959 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3960 };
3961
3962 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3963 InsertPointTy CodeGenIP,
3964 llvm::BasicBlock &FiniBB) {
3965 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3966 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3967 CodeGenIP, FiniBB);
3968 };
3969
3970 LexicalScope Scope(*this, S.getSourceRange());
3971 EmitStopPoint(&S);
3972 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3973
3974 return;
3975 }
3976 LexicalScope Scope(*this, S.getSourceRange());
3977 EmitStopPoint(&S);
3978 emitMaster(*this, S);
3979 }
3980
emitMasked(CodeGenFunction & CGF,const OMPExecutableDirective & S)3981 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3982 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3983 Action.Enter(CGF);
3984 CGF.EmitStmt(S.getRawStmt());
3985 };
3986 Expr *Filter = nullptr;
3987 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
3988 Filter = FilterClause->getThreadID();
3989 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
3990 Filter);
3991 }
3992
EmitOMPMaskedDirective(const OMPMaskedDirective & S)3993 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
3994 if (CGM.getLangOpts().OpenMPIRBuilder) {
3995 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3996 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3997
3998 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
3999 const Expr *Filter = nullptr;
4000 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4001 Filter = FilterClause->getThreadID();
4002 llvm::Value *FilterVal = Filter
4003 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4004 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4005
4006 auto FiniCB = [this](InsertPointTy IP) {
4007 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4008 };
4009
4010 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4011 InsertPointTy CodeGenIP,
4012 llvm::BasicBlock &FiniBB) {
4013 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4014 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4015 CodeGenIP, FiniBB);
4016 };
4017
4018 LexicalScope Scope(*this, S.getSourceRange());
4019 EmitStopPoint(&S);
4020 Builder.restoreIP(
4021 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4022
4023 return;
4024 }
4025 LexicalScope Scope(*this, S.getSourceRange());
4026 EmitStopPoint(&S);
4027 emitMasked(*this, S);
4028 }
4029
EmitOMPCriticalDirective(const OMPCriticalDirective & S)4030 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4031 if (CGM.getLangOpts().OpenMPIRBuilder) {
4032 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4033 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4034
4035 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4036 const Expr *Hint = nullptr;
4037 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4038 Hint = HintClause->getHint();
4039
4040 // TODO: This is slightly different from what's currently being done in
4041 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4042 // about typing is final.
4043 llvm::Value *HintInst = nullptr;
4044 if (Hint)
4045 HintInst =
4046 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4047
4048 auto FiniCB = [this](InsertPointTy IP) {
4049 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4050 };
4051
4052 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4053 InsertPointTy CodeGenIP,
4054 llvm::BasicBlock &FiniBB) {
4055 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4056 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
4057 CodeGenIP, FiniBB);
4058 };
4059
4060 LexicalScope Scope(*this, S.getSourceRange());
4061 EmitStopPoint(&S);
4062 Builder.restoreIP(OMPBuilder.createCritical(
4063 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4064 HintInst));
4065
4066 return;
4067 }
4068
4069 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4070 Action.Enter(CGF);
4071 CGF.EmitStmt(S.getAssociatedStmt());
4072 };
4073 const Expr *Hint = nullptr;
4074 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4075 Hint = HintClause->getHint();
4076 LexicalScope Scope(*this, S.getSourceRange());
4077 EmitStopPoint(&S);
4078 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4079 S.getDirectiveName().getAsString(),
4080 CodeGen, S.getBeginLoc(), Hint);
4081 }
4082
EmitOMPParallelForDirective(const OMPParallelForDirective & S)4083 void CodeGenFunction::EmitOMPParallelForDirective(
4084 const OMPParallelForDirective &S) {
4085 // Emit directive as a combined directive that consists of two implicit
4086 // directives: 'parallel' with 'for' directive.
4087 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4088 Action.Enter(CGF);
4089 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4090 };
4091 {
4092 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4093 [](const OMPReductionClause *C) {
4094 return C->getModifier() == OMPC_REDUCTION_inscan;
4095 })) {
4096 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4097 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4098 CGCapturedStmtInfo CGSI(CR_OpenMP);
4099 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4100 OMPLoopScope LoopScope(CGF, S);
4101 return CGF.EmitScalarExpr(S.getNumIterations());
4102 };
4103 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4104 }
4105 auto LPCRegion =
4106 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4107 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4108 emitEmptyBoundParameters);
4109 }
4110 // Check for outer lastprivate conditional update.
4111 checkForLastprivateConditionalUpdate(*this, S);
4112 }
4113
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)4114 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4115 const OMPParallelForSimdDirective &S) {
4116 // Emit directive as a combined directive that consists of two implicit
4117 // directives: 'parallel' with 'for' directive.
4118 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4119 Action.Enter(CGF);
4120 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4121 };
4122 {
4123 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4124 [](const OMPReductionClause *C) {
4125 return C->getModifier() == OMPC_REDUCTION_inscan;
4126 })) {
4127 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4128 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4129 CGCapturedStmtInfo CGSI(CR_OpenMP);
4130 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4131 OMPLoopScope LoopScope(CGF, S);
4132 return CGF.EmitScalarExpr(S.getNumIterations());
4133 };
4134 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4135 }
4136 auto LPCRegion =
4137 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4138 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4139 emitEmptyBoundParameters);
4140 }
4141 // Check for outer lastprivate conditional update.
4142 checkForLastprivateConditionalUpdate(*this, S);
4143 }
4144
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)4145 void CodeGenFunction::EmitOMPParallelMasterDirective(
4146 const OMPParallelMasterDirective &S) {
4147 // Emit directive as a combined directive that consists of two implicit
4148 // directives: 'parallel' with 'master' directive.
4149 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4150 Action.Enter(CGF);
4151 OMPPrivateScope PrivateScope(CGF);
4152 bool Copyins = CGF.EmitOMPCopyinClause(S);
4153 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4154 if (Copyins) {
4155 // Emit implicit barrier to synchronize threads and avoid data races on
4156 // propagation master's thread values of threadprivate variables to local
4157 // instances of that variables of all other implicit threads.
4158 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4159 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4160 /*ForceSimpleCall=*/true);
4161 }
4162 CGF.EmitOMPPrivateClause(S, PrivateScope);
4163 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4164 (void)PrivateScope.Privatize();
4165 emitMaster(CGF, S);
4166 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4167 };
4168 {
4169 auto LPCRegion =
4170 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4171 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4172 emitEmptyBoundParameters);
4173 emitPostUpdateForReductionClause(*this, S,
4174 [](CodeGenFunction &) { return nullptr; });
4175 }
4176 // Check for outer lastprivate conditional update.
4177 checkForLastprivateConditionalUpdate(*this, S);
4178 }
4179
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)4180 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4181 const OMPParallelSectionsDirective &S) {
4182 // Emit directive as a combined directive that consists of two implicit
4183 // directives: 'parallel' with 'sections' directive.
4184 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4185 Action.Enter(CGF);
4186 CGF.EmitSections(S);
4187 };
4188 {
4189 auto LPCRegion =
4190 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4191 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4192 emitEmptyBoundParameters);
4193 }
4194 // Check for outer lastprivate conditional update.
4195 checkForLastprivateConditionalUpdate(*this, S);
4196 }
4197
4198 namespace {
4199 /// Get the list of variables declared in the context of the untied tasks.
4200 class CheckVarsEscapingUntiedTaskDeclContext final
4201 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4202 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4203
4204 public:
4205 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4206 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
VisitDeclStmt(const DeclStmt * S)4207 void VisitDeclStmt(const DeclStmt *S) {
4208 if (!S)
4209 return;
4210 // Need to privatize only local vars, static locals can be processed as is.
4211 for (const Decl *D : S->decls()) {
4212 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4213 if (VD->hasLocalStorage())
4214 PrivateDecls.push_back(VD);
4215 }
4216 }
VisitOMPExecutableDirective(const OMPExecutableDirective *)4217 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
VisitCapturedStmt(const CapturedStmt *)4218 void VisitCapturedStmt(const CapturedStmt *) { return; }
VisitLambdaExpr(const LambdaExpr *)4219 void VisitLambdaExpr(const LambdaExpr *) { return; }
VisitBlockExpr(const BlockExpr *)4220 void VisitBlockExpr(const BlockExpr *) { return; }
VisitStmt(const Stmt * S)4221 void VisitStmt(const Stmt *S) {
4222 if (!S)
4223 return;
4224 for (const Stmt *Child : S->children())
4225 if (Child)
4226 Visit(Child);
4227 }
4228
4229 /// Swaps list of vars with the provided one.
getPrivateDecls() const4230 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4231 };
4232 } // anonymous namespace
4233
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)4234 void CodeGenFunction::EmitOMPTaskBasedDirective(
4235 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4236 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4237 OMPTaskDataTy &Data) {
4238 // Emit outlined function for task construct.
4239 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4240 auto I = CS->getCapturedDecl()->param_begin();
4241 auto PartId = std::next(I);
4242 auto TaskT = std::next(I, 4);
4243 // Check if the task is final
4244 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4245 // If the condition constant folds and can be elided, try to avoid emitting
4246 // the condition and the dead arm of the if/else.
4247 const Expr *Cond = Clause->getCondition();
4248 bool CondConstant;
4249 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4250 Data.Final.setInt(CondConstant);
4251 else
4252 Data.Final.setPointer(EvaluateExprAsBool(Cond));
4253 } else {
4254 // By default the task is not final.
4255 Data.Final.setInt(/*IntVal=*/false);
4256 }
4257 // Check if the task has 'priority' clause.
4258 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4259 const Expr *Prio = Clause->getPriority();
4260 Data.Priority.setInt(/*IntVal=*/true);
4261 Data.Priority.setPointer(EmitScalarConversion(
4262 EmitScalarExpr(Prio), Prio->getType(),
4263 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4264 Prio->getExprLoc()));
4265 }
4266 // The first function argument for tasks is a thread id, the second one is a
4267 // part id (0 for tied tasks, >=0 for untied task).
4268 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4269 // Get list of private variables.
4270 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4271 auto IRef = C->varlist_begin();
4272 for (const Expr *IInit : C->private_copies()) {
4273 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4274 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4275 Data.PrivateVars.push_back(*IRef);
4276 Data.PrivateCopies.push_back(IInit);
4277 }
4278 ++IRef;
4279 }
4280 }
4281 EmittedAsPrivate.clear();
4282 // Get list of firstprivate variables.
4283 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4284 auto IRef = C->varlist_begin();
4285 auto IElemInitRef = C->inits().begin();
4286 for (const Expr *IInit : C->private_copies()) {
4287 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4288 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4289 Data.FirstprivateVars.push_back(*IRef);
4290 Data.FirstprivateCopies.push_back(IInit);
4291 Data.FirstprivateInits.push_back(*IElemInitRef);
4292 }
4293 ++IRef;
4294 ++IElemInitRef;
4295 }
4296 }
4297 // Get list of lastprivate variables (for taskloops).
4298 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4299 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4300 auto IRef = C->varlist_begin();
4301 auto ID = C->destination_exprs().begin();
4302 for (const Expr *IInit : C->private_copies()) {
4303 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4304 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4305 Data.LastprivateVars.push_back(*IRef);
4306 Data.LastprivateCopies.push_back(IInit);
4307 }
4308 LastprivateDstsOrigs.insert(
4309 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4310 cast<DeclRefExpr>(*IRef)));
4311 ++IRef;
4312 ++ID;
4313 }
4314 }
4315 SmallVector<const Expr *, 4> LHSs;
4316 SmallVector<const Expr *, 4> RHSs;
4317 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4318 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4319 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4320 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4321 Data.ReductionOps.append(C->reduction_ops().begin(),
4322 C->reduction_ops().end());
4323 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4324 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4325 }
4326 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4327 *this, S.getBeginLoc(), LHSs, RHSs, Data);
4328 // Build list of dependences.
4329 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4330 OMPTaskDataTy::DependData &DD =
4331 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4332 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4333 }
4334 // Get list of local vars for untied tasks.
4335 if (!Data.Tied) {
4336 CheckVarsEscapingUntiedTaskDeclContext Checker;
4337 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4338 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4339 Checker.getPrivateDecls().end());
4340 }
4341 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4342 CapturedRegion](CodeGenFunction &CGF,
4343 PrePostActionTy &Action) {
4344 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4345 std::pair<Address, Address>>
4346 UntiedLocalVars;
4347 // Set proper addresses for generated private copies.
4348 OMPPrivateScope Scope(CGF);
4349 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4350 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4351 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4352 enum { PrivatesParam = 2, CopyFnParam = 3 };
4353 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4354 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4355 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4356 CS->getCapturedDecl()->getParam(PrivatesParam)));
4357 // Map privates.
4358 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4359 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4360 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4361 CallArgs.push_back(PrivatesPtr);
4362 ParamTypes.push_back(PrivatesPtr->getType());
4363 for (const Expr *E : Data.PrivateVars) {
4364 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4365 Address PrivatePtr = CGF.CreateMemTemp(
4366 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4367 PrivatePtrs.emplace_back(VD, PrivatePtr);
4368 CallArgs.push_back(PrivatePtr.getPointer());
4369 ParamTypes.push_back(PrivatePtr.getType());
4370 }
4371 for (const Expr *E : Data.FirstprivateVars) {
4372 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4373 Address PrivatePtr =
4374 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4375 ".firstpriv.ptr.addr");
4376 PrivatePtrs.emplace_back(VD, PrivatePtr);
4377 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4378 CallArgs.push_back(PrivatePtr.getPointer());
4379 ParamTypes.push_back(PrivatePtr.getType());
4380 }
4381 for (const Expr *E : Data.LastprivateVars) {
4382 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4383 Address PrivatePtr =
4384 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4385 ".lastpriv.ptr.addr");
4386 PrivatePtrs.emplace_back(VD, PrivatePtr);
4387 CallArgs.push_back(PrivatePtr.getPointer());
4388 ParamTypes.push_back(PrivatePtr.getType());
4389 }
4390 for (const VarDecl *VD : Data.PrivateLocals) {
4391 QualType Ty = VD->getType().getNonReferenceType();
4392 if (VD->getType()->isLValueReferenceType())
4393 Ty = CGF.getContext().getPointerType(Ty);
4394 if (isAllocatableDecl(VD))
4395 Ty = CGF.getContext().getPointerType(Ty);
4396 Address PrivatePtr = CGF.CreateMemTemp(
4397 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4398 auto Result = UntiedLocalVars.insert(
4399 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4400 // If key exists update in place.
4401 if (Result.second == false)
4402 *Result.first = std::make_pair(
4403 VD, std::make_pair(PrivatePtr, Address::invalid()));
4404 CallArgs.push_back(PrivatePtr.getPointer());
4405 ParamTypes.push_back(PrivatePtr.getType());
4406 }
4407 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4408 ParamTypes, /*isVarArg=*/false);
4409 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4410 CopyFn, CopyFnTy->getPointerTo());
4411 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4412 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4413 for (const auto &Pair : LastprivateDstsOrigs) {
4414 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4415 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4416 /*RefersToEnclosingVariableOrCapture=*/
4417 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4418 Pair.second->getType(), VK_LValue,
4419 Pair.second->getExprLoc());
4420 Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4421 return CGF.EmitLValue(&DRE).getAddress(CGF);
4422 });
4423 }
4424 for (const auto &Pair : PrivatePtrs) {
4425 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4426 CGF.getContext().getDeclAlign(Pair.first));
4427 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4428 }
4429 // Adjust mapping for internal locals by mapping actual memory instead of
4430 // a pointer to this memory.
4431 for (auto &Pair : UntiedLocalVars) {
4432 if (isAllocatableDecl(Pair.first)) {
4433 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4434 Address Replacement(Ptr, CGF.getPointerAlign());
4435 Pair.second.first = Replacement;
4436 Ptr = CGF.Builder.CreateLoad(Replacement);
4437 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4438 Pair.second.second = Replacement;
4439 } else {
4440 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4441 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4442 Pair.second.first = Replacement;
4443 }
4444 }
4445 }
4446 if (Data.Reductions) {
4447 OMPPrivateScope FirstprivateScope(CGF);
4448 for (const auto &Pair : FirstprivatePtrs) {
4449 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4450 CGF.getContext().getDeclAlign(Pair.first));
4451 FirstprivateScope.addPrivate(Pair.first,
4452 [Replacement]() { return Replacement; });
4453 }
4454 (void)FirstprivateScope.Privatize();
4455 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4456 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4457 Data.ReductionCopies, Data.ReductionOps);
4458 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4459 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4460 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4461 RedCG.emitSharedOrigLValue(CGF, Cnt);
4462 RedCG.emitAggregateType(CGF, Cnt);
4463 // FIXME: This must removed once the runtime library is fixed.
4464 // Emit required threadprivate variables for
4465 // initializer/combiner/finalizer.
4466 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4467 RedCG, Cnt);
4468 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4469 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4470 Replacement =
4471 Address(CGF.EmitScalarConversion(
4472 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4473 CGF.getContext().getPointerType(
4474 Data.ReductionCopies[Cnt]->getType()),
4475 Data.ReductionCopies[Cnt]->getExprLoc()),
4476 Replacement.getAlignment());
4477 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4478 Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4479 [Replacement]() { return Replacement; });
4480 }
4481 }
4482 // Privatize all private variables except for in_reduction items.
4483 (void)Scope.Privatize();
4484 SmallVector<const Expr *, 4> InRedVars;
4485 SmallVector<const Expr *, 4> InRedPrivs;
4486 SmallVector<const Expr *, 4> InRedOps;
4487 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4488 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4489 auto IPriv = C->privates().begin();
4490 auto IRed = C->reduction_ops().begin();
4491 auto ITD = C->taskgroup_descriptors().begin();
4492 for (const Expr *Ref : C->varlists()) {
4493 InRedVars.emplace_back(Ref);
4494 InRedPrivs.emplace_back(*IPriv);
4495 InRedOps.emplace_back(*IRed);
4496 TaskgroupDescriptors.emplace_back(*ITD);
4497 std::advance(IPriv, 1);
4498 std::advance(IRed, 1);
4499 std::advance(ITD, 1);
4500 }
4501 }
4502 // Privatize in_reduction items here, because taskgroup descriptors must be
4503 // privatized earlier.
4504 OMPPrivateScope InRedScope(CGF);
4505 if (!InRedVars.empty()) {
4506 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4507 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4508 RedCG.emitSharedOrigLValue(CGF, Cnt);
4509 RedCG.emitAggregateType(CGF, Cnt);
4510 // The taskgroup descriptor variable is always implicit firstprivate and
4511 // privatized already during processing of the firstprivates.
4512 // FIXME: This must removed once the runtime library is fixed.
4513 // Emit required threadprivate variables for
4514 // initializer/combiner/finalizer.
4515 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4516 RedCG, Cnt);
4517 llvm::Value *ReductionsPtr;
4518 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4519 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4520 TRExpr->getExprLoc());
4521 } else {
4522 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4523 }
4524 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4525 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4526 Replacement = Address(
4527 CGF.EmitScalarConversion(
4528 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4529 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4530 InRedPrivs[Cnt]->getExprLoc()),
4531 Replacement.getAlignment());
4532 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4533 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4534 [Replacement]() { return Replacement; });
4535 }
4536 }
4537 (void)InRedScope.Privatize();
4538
4539 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4540 UntiedLocalVars);
4541 Action.Enter(CGF);
4542 BodyGen(CGF);
4543 };
4544 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4545 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4546 Data.NumberOfParts);
4547 OMPLexicalScope Scope(*this, S, llvm::None,
4548 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4549 !isOpenMPSimdDirective(S.getDirectiveKind()));
4550 TaskGen(*this, OutlinedFn, Data);
4551 }
4552
4553 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)4554 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4555 QualType Ty, CapturedDecl *CD,
4556 SourceLocation Loc) {
4557 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4558 ImplicitParamDecl::Other);
4559 auto *OrigRef = DeclRefExpr::Create(
4560 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4561 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4562 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4563 ImplicitParamDecl::Other);
4564 auto *PrivateRef = DeclRefExpr::Create(
4565 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4566 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4567 QualType ElemType = C.getBaseElementType(Ty);
4568 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4569 ImplicitParamDecl::Other);
4570 auto *InitRef = DeclRefExpr::Create(
4571 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4572 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4573 PrivateVD->setInitStyle(VarDecl::CInit);
4574 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4575 InitRef, /*BasePath=*/nullptr,
4576 VK_RValue, FPOptionsOverride()));
4577 Data.FirstprivateVars.emplace_back(OrigRef);
4578 Data.FirstprivateCopies.emplace_back(PrivateRef);
4579 Data.FirstprivateInits.emplace_back(InitRef);
4580 return OrigVD;
4581 }
4582
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)4583 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4584 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4585 OMPTargetDataInfo &InputInfo) {
4586 // Emit outlined function for task construct.
4587 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4588 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4589 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4590 auto I = CS->getCapturedDecl()->param_begin();
4591 auto PartId = std::next(I);
4592 auto TaskT = std::next(I, 4);
4593 OMPTaskDataTy Data;
4594 // The task is not final.
4595 Data.Final.setInt(/*IntVal=*/false);
4596 // Get list of firstprivate variables.
4597 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4598 auto IRef = C->varlist_begin();
4599 auto IElemInitRef = C->inits().begin();
4600 for (auto *IInit : C->private_copies()) {
4601 Data.FirstprivateVars.push_back(*IRef);
4602 Data.FirstprivateCopies.push_back(IInit);
4603 Data.FirstprivateInits.push_back(*IElemInitRef);
4604 ++IRef;
4605 ++IElemInitRef;
4606 }
4607 }
4608 OMPPrivateScope TargetScope(*this);
4609 VarDecl *BPVD = nullptr;
4610 VarDecl *PVD = nullptr;
4611 VarDecl *SVD = nullptr;
4612 VarDecl *MVD = nullptr;
4613 if (InputInfo.NumberOfTargetItems > 0) {
4614 auto *CD = CapturedDecl::Create(
4615 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4616 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4617 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4618 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4619 /*IndexTypeQuals=*/0);
4620 BPVD = createImplicitFirstprivateForType(
4621 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4622 PVD = createImplicitFirstprivateForType(
4623 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4624 QualType SizesType = getContext().getConstantArrayType(
4625 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4626 ArrSize, nullptr, ArrayType::Normal,
4627 /*IndexTypeQuals=*/0);
4628 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4629 S.getBeginLoc());
4630 TargetScope.addPrivate(
4631 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4632 TargetScope.addPrivate(PVD,
4633 [&InputInfo]() { return InputInfo.PointersArray; });
4634 TargetScope.addPrivate(SVD,
4635 [&InputInfo]() { return InputInfo.SizesArray; });
4636 // If there is no user-defined mapper, the mapper array will be nullptr. In
4637 // this case, we don't need to privatize it.
4638 if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4639 InputInfo.MappersArray.getPointer())) {
4640 MVD = createImplicitFirstprivateForType(
4641 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4642 TargetScope.addPrivate(MVD,
4643 [&InputInfo]() { return InputInfo.MappersArray; });
4644 }
4645 }
4646 (void)TargetScope.Privatize();
4647 // Build list of dependences.
4648 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4649 OMPTaskDataTy::DependData &DD =
4650 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4651 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4652 }
4653 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4654 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4655 // Set proper addresses for generated private copies.
4656 OMPPrivateScope Scope(CGF);
4657 if (!Data.FirstprivateVars.empty()) {
4658 enum { PrivatesParam = 2, CopyFnParam = 3 };
4659 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4660 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4661 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4662 CS->getCapturedDecl()->getParam(PrivatesParam)));
4663 // Map privates.
4664 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4665 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4666 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4667 CallArgs.push_back(PrivatesPtr);
4668 ParamTypes.push_back(PrivatesPtr->getType());
4669 for (const Expr *E : Data.FirstprivateVars) {
4670 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4671 Address PrivatePtr =
4672 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4673 ".firstpriv.ptr.addr");
4674 PrivatePtrs.emplace_back(VD, PrivatePtr);
4675 CallArgs.push_back(PrivatePtr.getPointer());
4676 ParamTypes.push_back(PrivatePtr.getType());
4677 }
4678 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4679 ParamTypes, /*isVarArg=*/false);
4680 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4681 CopyFn, CopyFnTy->getPointerTo());
4682 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4683 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4684 for (const auto &Pair : PrivatePtrs) {
4685 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4686 CGF.getContext().getDeclAlign(Pair.first));
4687 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4688 }
4689 }
4690 // Privatize all private variables except for in_reduction items.
4691 (void)Scope.Privatize();
4692 if (InputInfo.NumberOfTargetItems > 0) {
4693 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4694 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4695 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4696 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4697 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4698 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4699 // If MVD is nullptr, the mapper array is not privatized
4700 if (MVD)
4701 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4702 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4703 }
4704
4705 Action.Enter(CGF);
4706 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4707 BodyGen(CGF);
4708 };
4709 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4710 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4711 Data.NumberOfParts);
4712 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4713 IntegerLiteral IfCond(getContext(), TrueOrFalse,
4714 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4715 SourceLocation());
4716
4717 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4718 SharedsTy, CapturedStruct, &IfCond, Data);
4719 }
4720
EmitOMPTaskDirective(const OMPTaskDirective & S)4721 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4722 // Emit outlined function for task construct.
4723 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4724 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4725 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4726 const Expr *IfCond = nullptr;
4727 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4728 if (C->getNameModifier() == OMPD_unknown ||
4729 C->getNameModifier() == OMPD_task) {
4730 IfCond = C->getCondition();
4731 break;
4732 }
4733 }
4734
4735 OMPTaskDataTy Data;
4736 // Check if we should emit tied or untied task.
4737 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4738 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4739 CGF.EmitStmt(CS->getCapturedStmt());
4740 };
4741 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4742 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4743 const OMPTaskDataTy &Data) {
4744 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4745 SharedsTy, CapturedStruct, IfCond,
4746 Data);
4747 };
4748 auto LPCRegion =
4749 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4750 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4751 }
4752
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)4753 void CodeGenFunction::EmitOMPTaskyieldDirective(
4754 const OMPTaskyieldDirective &S) {
4755 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4756 }
4757
EmitOMPBarrierDirective(const OMPBarrierDirective & S)4758 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4759 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4760 }
4761
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)4762 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4763 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4764 }
4765
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)4766 void CodeGenFunction::EmitOMPTaskgroupDirective(
4767 const OMPTaskgroupDirective &S) {
4768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4769 Action.Enter(CGF);
4770 if (const Expr *E = S.getReductionRef()) {
4771 SmallVector<const Expr *, 4> LHSs;
4772 SmallVector<const Expr *, 4> RHSs;
4773 OMPTaskDataTy Data;
4774 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4775 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4776 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4777 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4778 Data.ReductionOps.append(C->reduction_ops().begin(),
4779 C->reduction_ops().end());
4780 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4781 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4782 }
4783 llvm::Value *ReductionDesc =
4784 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4785 LHSs, RHSs, Data);
4786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4787 CGF.EmitVarDecl(*VD);
4788 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4789 /*Volatile=*/false, E->getType());
4790 }
4791 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4792 };
4793 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4794 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4795 }
4796
EmitOMPFlushDirective(const OMPFlushDirective & S)4797 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4798 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4799 ? llvm::AtomicOrdering::NotAtomic
4800 : llvm::AtomicOrdering::AcquireRelease;
4801 CGM.getOpenMPRuntime().emitFlush(
4802 *this,
4803 [&S]() -> ArrayRef<const Expr *> {
4804 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4805 return llvm::makeArrayRef(FlushClause->varlist_begin(),
4806 FlushClause->varlist_end());
4807 return llvm::None;
4808 }(),
4809 S.getBeginLoc(), AO);
4810 }
4811
EmitOMPDepobjDirective(const OMPDepobjDirective & S)4812 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4813 const auto *DO = S.getSingleClause<OMPDepobjClause>();
4814 LValue DOLVal = EmitLValue(DO->getDepobj());
4815 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4816 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4817 DC->getModifier());
4818 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4819 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4820 *this, Dependencies, DC->getBeginLoc());
4821 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4822 return;
4823 }
4824 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4825 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4826 return;
4827 }
4828 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4829 CGM.getOpenMPRuntime().emitUpdateClause(
4830 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4831 return;
4832 }
4833 }
4834
EmitOMPScanDirective(const OMPScanDirective & S)4835 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4836 if (!OMPParentLoopDirectiveForScan)
4837 return;
4838 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4839 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4840 SmallVector<const Expr *, 4> Shareds;
4841 SmallVector<const Expr *, 4> Privates;
4842 SmallVector<const Expr *, 4> LHSs;
4843 SmallVector<const Expr *, 4> RHSs;
4844 SmallVector<const Expr *, 4> ReductionOps;
4845 SmallVector<const Expr *, 4> CopyOps;
4846 SmallVector<const Expr *, 4> CopyArrayTemps;
4847 SmallVector<const Expr *, 4> CopyArrayElems;
4848 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4849 if (C->getModifier() != OMPC_REDUCTION_inscan)
4850 continue;
4851 Shareds.append(C->varlist_begin(), C->varlist_end());
4852 Privates.append(C->privates().begin(), C->privates().end());
4853 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4854 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4855 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4856 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4857 CopyArrayTemps.append(C->copy_array_temps().begin(),
4858 C->copy_array_temps().end());
4859 CopyArrayElems.append(C->copy_array_elems().begin(),
4860 C->copy_array_elems().end());
4861 }
4862 if (ParentDir.getDirectiveKind() == OMPD_simd ||
4863 (getLangOpts().OpenMPSimd &&
4864 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4865 // For simd directive and simd-based directives in simd only mode, use the
4866 // following codegen:
4867 // int x = 0;
4868 // #pragma omp simd reduction(inscan, +: x)
4869 // for (..) {
4870 // <first part>
4871 // #pragma omp scan inclusive(x)
4872 // <second part>
4873 // }
4874 // is transformed to:
4875 // int x = 0;
4876 // for (..) {
4877 // int x_priv = 0;
4878 // <first part>
4879 // x = x_priv + x;
4880 // x_priv = x;
4881 // <second part>
4882 // }
4883 // and
4884 // int x = 0;
4885 // #pragma omp simd reduction(inscan, +: x)
4886 // for (..) {
4887 // <first part>
4888 // #pragma omp scan exclusive(x)
4889 // <second part>
4890 // }
4891 // to
4892 // int x = 0;
4893 // for (..) {
4894 // int x_priv = 0;
4895 // <second part>
4896 // int temp = x;
4897 // x = x_priv + x;
4898 // x_priv = temp;
4899 // <first part>
4900 // }
4901 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4902 EmitBranch(IsInclusive
4903 ? OMPScanReduce
4904 : BreakContinueStack.back().ContinueBlock.getBlock());
4905 EmitBlock(OMPScanDispatch);
4906 {
4907 // New scope for correct construction/destruction of temp variables for
4908 // exclusive scan.
4909 LexicalScope Scope(*this, S.getSourceRange());
4910 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4911 EmitBlock(OMPScanReduce);
4912 if (!IsInclusive) {
4913 // Create temp var and copy LHS value to this temp value.
4914 // TMP = LHS;
4915 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4916 const Expr *PrivateExpr = Privates[I];
4917 const Expr *TempExpr = CopyArrayTemps[I];
4918 EmitAutoVarDecl(
4919 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4920 LValue DestLVal = EmitLValue(TempExpr);
4921 LValue SrcLVal = EmitLValue(LHSs[I]);
4922 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4923 SrcLVal.getAddress(*this),
4924 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4925 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4926 CopyOps[I]);
4927 }
4928 }
4929 CGM.getOpenMPRuntime().emitReduction(
4930 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4931 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4932 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4933 const Expr *PrivateExpr = Privates[I];
4934 LValue DestLVal;
4935 LValue SrcLVal;
4936 if (IsInclusive) {
4937 DestLVal = EmitLValue(RHSs[I]);
4938 SrcLVal = EmitLValue(LHSs[I]);
4939 } else {
4940 const Expr *TempExpr = CopyArrayTemps[I];
4941 DestLVal = EmitLValue(RHSs[I]);
4942 SrcLVal = EmitLValue(TempExpr);
4943 }
4944 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4945 SrcLVal.getAddress(*this),
4946 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4947 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4948 CopyOps[I]);
4949 }
4950 }
4951 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4952 OMPScanExitBlock = IsInclusive
4953 ? BreakContinueStack.back().ContinueBlock.getBlock()
4954 : OMPScanReduce;
4955 EmitBlock(OMPAfterScanBlock);
4956 return;
4957 }
4958 if (!IsInclusive) {
4959 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4960 EmitBlock(OMPScanExitBlock);
4961 }
4962 if (OMPFirstScanLoop) {
4963 // Emit buffer[i] = red; at the end of the input phase.
4964 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4965 .getIterationVariable()
4966 ->IgnoreParenImpCasts();
4967 LValue IdxLVal = EmitLValue(IVExpr);
4968 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4969 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4970 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4971 const Expr *PrivateExpr = Privates[I];
4972 const Expr *OrigExpr = Shareds[I];
4973 const Expr *CopyArrayElem = CopyArrayElems[I];
4974 OpaqueValueMapping IdxMapping(
4975 *this,
4976 cast<OpaqueValueExpr>(
4977 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4978 RValue::get(IdxVal));
4979 LValue DestLVal = EmitLValue(CopyArrayElem);
4980 LValue SrcLVal = EmitLValue(OrigExpr);
4981 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4982 SrcLVal.getAddress(*this),
4983 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4984 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4985 CopyOps[I]);
4986 }
4987 }
4988 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4989 if (IsInclusive) {
4990 EmitBlock(OMPScanExitBlock);
4991 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4992 }
4993 EmitBlock(OMPScanDispatch);
4994 if (!OMPFirstScanLoop) {
4995 // Emit red = buffer[i]; at the entrance to the scan phase.
4996 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4997 .getIterationVariable()
4998 ->IgnoreParenImpCasts();
4999 LValue IdxLVal = EmitLValue(IVExpr);
5000 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5001 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5002 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5003 if (!IsInclusive) {
5004 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5005 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5006 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5007 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5008 EmitBlock(ContBB);
5009 // Use idx - 1 iteration for exclusive scan.
5010 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5011 }
5012 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5013 const Expr *PrivateExpr = Privates[I];
5014 const Expr *OrigExpr = Shareds[I];
5015 const Expr *CopyArrayElem = CopyArrayElems[I];
5016 OpaqueValueMapping IdxMapping(
5017 *this,
5018 cast<OpaqueValueExpr>(
5019 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5020 RValue::get(IdxVal));
5021 LValue SrcLVal = EmitLValue(CopyArrayElem);
5022 LValue DestLVal = EmitLValue(OrigExpr);
5023 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5024 SrcLVal.getAddress(*this),
5025 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5026 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5027 CopyOps[I]);
5028 }
5029 if (!IsInclusive) {
5030 EmitBlock(ExclusiveExitBB);
5031 }
5032 }
5033 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5034 : OMPAfterScanBlock);
5035 EmitBlock(OMPAfterScanBlock);
5036 }
5037
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)5038 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5039 const CodeGenLoopTy &CodeGenLoop,
5040 Expr *IncExpr) {
5041 // Emit the loop iteration variable.
5042 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5043 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5044 EmitVarDecl(*IVDecl);
5045
5046 // Emit the iterations count variable.
5047 // If it is not a variable, Sema decided to calculate iterations count on each
5048 // iteration (e.g., it is foldable into a constant).
5049 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5050 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5051 // Emit calculation of the iterations count.
5052 EmitIgnoredExpr(S.getCalcLastIteration());
5053 }
5054
5055 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5056
5057 bool HasLastprivateClause = false;
5058 // Check pre-condition.
5059 {
5060 OMPLoopScope PreInitScope(*this, S);
5061 // Skip the entire loop if we don't meet the precondition.
5062 // If the condition constant folds and can be elided, avoid emitting the
5063 // whole loop.
5064 bool CondConstant;
5065 llvm::BasicBlock *ContBlock = nullptr;
5066 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5067 if (!CondConstant)
5068 return;
5069 } else {
5070 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5071 ContBlock = createBasicBlock("omp.precond.end");
5072 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5073 getProfileCount(&S));
5074 EmitBlock(ThenBlock);
5075 incrementProfileCounter(&S);
5076 }
5077
5078 emitAlignedClause(*this, S);
5079 // Emit 'then' code.
5080 {
5081 // Emit helper vars inits.
5082
5083 LValue LB = EmitOMPHelperVar(
5084 *this, cast<DeclRefExpr>(
5085 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5086 ? S.getCombinedLowerBoundVariable()
5087 : S.getLowerBoundVariable())));
5088 LValue UB = EmitOMPHelperVar(
5089 *this, cast<DeclRefExpr>(
5090 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5091 ? S.getCombinedUpperBoundVariable()
5092 : S.getUpperBoundVariable())));
5093 LValue ST =
5094 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5095 LValue IL =
5096 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5097
5098 OMPPrivateScope LoopScope(*this);
5099 if (EmitOMPFirstprivateClause(S, LoopScope)) {
5100 // Emit implicit barrier to synchronize threads and avoid data races
5101 // on initialization of firstprivate variables and post-update of
5102 // lastprivate variables.
5103 CGM.getOpenMPRuntime().emitBarrierCall(
5104 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5105 /*ForceSimpleCall=*/true);
5106 }
5107 EmitOMPPrivateClause(S, LoopScope);
5108 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5109 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5110 !isOpenMPTeamsDirective(S.getDirectiveKind()))
5111 EmitOMPReductionClauseInit(S, LoopScope);
5112 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5113 EmitOMPPrivateLoopCounters(S, LoopScope);
5114 (void)LoopScope.Privatize();
5115 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5116 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5117
5118 // Detect the distribute schedule kind and chunk.
5119 llvm::Value *Chunk = nullptr;
5120 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5121 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5122 ScheduleKind = C->getDistScheduleKind();
5123 if (const Expr *Ch = C->getChunkSize()) {
5124 Chunk = EmitScalarExpr(Ch);
5125 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5126 S.getIterationVariable()->getType(),
5127 S.getBeginLoc());
5128 }
5129 } else {
5130 // Default behaviour for dist_schedule clause.
5131 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5132 *this, S, ScheduleKind, Chunk);
5133 }
5134 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5135 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5136
5137 // OpenMP [2.10.8, distribute Construct, Description]
5138 // If dist_schedule is specified, kind must be static. If specified,
5139 // iterations are divided into chunks of size chunk_size, chunks are
5140 // assigned to the teams of the league in a round-robin fashion in the
5141 // order of the team number. When no chunk_size is specified, the
5142 // iteration space is divided into chunks that are approximately equal
5143 // in size, and at most one chunk is distributed to each team of the
5144 // league. The size of the chunks is unspecified in this case.
5145 bool StaticChunked = RT.isStaticChunked(
5146 ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5147 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5148 if (RT.isStaticNonchunked(ScheduleKind,
5149 /* Chunked */ Chunk != nullptr) ||
5150 StaticChunked) {
5151 CGOpenMPRuntime::StaticRTInput StaticInit(
5152 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5153 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5154 StaticChunked ? Chunk : nullptr);
5155 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5156 StaticInit);
5157 JumpDest LoopExit =
5158 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5159 // UB = min(UB, GlobalUB);
5160 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5161 ? S.getCombinedEnsureUpperBound()
5162 : S.getEnsureUpperBound());
5163 // IV = LB;
5164 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5165 ? S.getCombinedInit()
5166 : S.getInit());
5167
5168 const Expr *Cond =
5169 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5170 ? S.getCombinedCond()
5171 : S.getCond();
5172
5173 if (StaticChunked)
5174 Cond = S.getCombinedDistCond();
5175
5176 // For static unchunked schedules generate:
5177 //
5178 // 1. For distribute alone, codegen
5179 // while (idx <= UB) {
5180 // BODY;
5181 // ++idx;
5182 // }
5183 //
5184 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5185 // while (idx <= UB) {
5186 // <CodeGen rest of pragma>(LB, UB);
5187 // idx += ST;
5188 // }
5189 //
5190 // For static chunk one schedule generate:
5191 //
5192 // while (IV <= GlobalUB) {
5193 // <CodeGen rest of pragma>(LB, UB);
5194 // LB += ST;
5195 // UB += ST;
5196 // UB = min(UB, GlobalUB);
5197 // IV = LB;
5198 // }
5199 //
5200 emitCommonSimdLoop(
5201 *this, S,
5202 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5203 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5204 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
5205 },
5206 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5207 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5208 CGF.EmitOMPInnerLoop(
5209 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5210 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5211 CodeGenLoop(CGF, S, LoopExit);
5212 },
5213 [&S, StaticChunked](CodeGenFunction &CGF) {
5214 if (StaticChunked) {
5215 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5216 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5217 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5218 CGF.EmitIgnoredExpr(S.getCombinedInit());
5219 }
5220 });
5221 });
5222 EmitBlock(LoopExit.getBlock());
5223 // Tell the runtime we are done.
5224 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5225 } else {
5226 // Emit the outer loop, which requests its work chunk [LB..UB] from
5227 // runtime and runs the inner loop to process it.
5228 const OMPLoopArguments LoopArguments = {
5229 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5230 IL.getAddress(*this), Chunk};
5231 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5232 CodeGenLoop);
5233 }
5234 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5235 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5236 return CGF.Builder.CreateIsNotNull(
5237 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5238 });
5239 }
5240 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5241 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5242 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5243 EmitOMPReductionClauseFinal(S, OMPD_simd);
5244 // Emit post-update of the reduction variables if IsLastIter != 0.
5245 emitPostUpdateForReductionClause(
5246 *this, S, [IL, &S](CodeGenFunction &CGF) {
5247 return CGF.Builder.CreateIsNotNull(
5248 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5249 });
5250 }
5251 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5252 if (HasLastprivateClause) {
5253 EmitOMPLastprivateClauseFinal(
5254 S, /*NoFinals=*/false,
5255 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5256 }
5257 }
5258
5259 // We're now done with the loop, so jump to the continuation block.
5260 if (ContBlock) {
5261 EmitBranch(ContBlock);
5262 EmitBlock(ContBlock, true);
5263 }
5264 }
5265 }
5266
EmitOMPDistributeDirective(const OMPDistributeDirective & S)5267 void CodeGenFunction::EmitOMPDistributeDirective(
5268 const OMPDistributeDirective &S) {
5269 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5270 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5271 };
5272 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5273 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5274 }
5275
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)5276 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5277 const CapturedStmt *S,
5278 SourceLocation Loc) {
5279 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5280 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5281 CGF.CapturedStmtInfo = &CapStmtInfo;
5282 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5283 Fn->setDoesNotRecurse();
5284 return Fn;
5285 }
5286
EmitOMPOrderedDirective(const OMPOrderedDirective & S)5287 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5288 if (S.hasClausesOfKind<OMPDependClause>()) {
5289 assert(!S.hasAssociatedStmt() &&
5290 "No associated statement must be in ordered depend construct.");
5291 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5292 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5293 return;
5294 }
5295 const auto *C = S.getSingleClause<OMPSIMDClause>();
5296 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5297 PrePostActionTy &Action) {
5298 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5299 if (C) {
5300 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5301 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5302 llvm::Function *OutlinedFn =
5303 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5304 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5305 OutlinedFn, CapturedVars);
5306 } else {
5307 Action.Enter(CGF);
5308 CGF.EmitStmt(CS->getCapturedStmt());
5309 }
5310 };
5311 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5312 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5313 }
5314
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)5315 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5316 QualType SrcType, QualType DestType,
5317 SourceLocation Loc) {
5318 assert(CGF.hasScalarEvaluationKind(DestType) &&
5319 "DestType must have scalar evaluation kind.");
5320 assert(!Val.isAggregate() && "Must be a scalar or complex.");
5321 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5322 DestType, Loc)
5323 : CGF.EmitComplexToScalarConversion(
5324 Val.getComplexVal(), SrcType, DestType, Loc);
5325 }
5326
5327 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)5328 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5329 QualType DestType, SourceLocation Loc) {
5330 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5331 "DestType must have complex evaluation kind.");
5332 CodeGenFunction::ComplexPairTy ComplexVal;
5333 if (Val.isScalar()) {
5334 // Convert the input element to the element type of the complex.
5335 QualType DestElementType =
5336 DestType->castAs<ComplexType>()->getElementType();
5337 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5338 Val.getScalarVal(), SrcType, DestElementType, Loc);
5339 ComplexVal = CodeGenFunction::ComplexPairTy(
5340 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5341 } else {
5342 assert(Val.isComplex() && "Must be a scalar or complex.");
5343 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5344 QualType DestElementType =
5345 DestType->castAs<ComplexType>()->getElementType();
5346 ComplexVal.first = CGF.EmitScalarConversion(
5347 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5348 ComplexVal.second = CGF.EmitScalarConversion(
5349 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5350 }
5351 return ComplexVal;
5352 }
5353
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)5354 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5355 LValue LVal, RValue RVal) {
5356 if (LVal.isGlobalReg())
5357 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
5358 else
5359 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
5360 }
5361
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)5362 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
5363 llvm::AtomicOrdering AO, LValue LVal,
5364 SourceLocation Loc) {
5365 if (LVal.isGlobalReg())
5366 return CGF.EmitLoadOfLValue(LVal, Loc);
5367 return CGF.EmitAtomicLoad(
5368 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
5369 LVal.isVolatile());
5370 }
5371
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)5372 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
5373 QualType RValTy, SourceLocation Loc) {
5374 switch (getEvaluationKind(LVal.getType())) {
5375 case TEK_Scalar:
5376 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
5377 *this, RVal, RValTy, LVal.getType(), Loc)),
5378 LVal);
5379 break;
5380 case TEK_Complex:
5381 EmitStoreOfComplex(
5382 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
5383 /*isInit=*/false);
5384 break;
5385 case TEK_Aggregate:
5386 llvm_unreachable("Must be a scalar or complex.");
5387 }
5388 }
5389
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)5390 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5391 const Expr *X, const Expr *V,
5392 SourceLocation Loc) {
5393 // v = x;
5394 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
5395 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
5396 LValue XLValue = CGF.EmitLValue(X);
5397 LValue VLValue = CGF.EmitLValue(V);
5398 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
5399 // OpenMP, 2.17.7, atomic Construct
5400 // If the read or capture clause is specified and the acquire, acq_rel, or
5401 // seq_cst clause is specified then the strong flush on exit from the atomic
5402 // operation is also an acquire flush.
5403 switch (AO) {
5404 case llvm::AtomicOrdering::Acquire:
5405 case llvm::AtomicOrdering::AcquireRelease:
5406 case llvm::AtomicOrdering::SequentiallyConsistent:
5407 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5408 llvm::AtomicOrdering::Acquire);
5409 break;
5410 case llvm::AtomicOrdering::Monotonic:
5411 case llvm::AtomicOrdering::Release:
5412 break;
5413 case llvm::AtomicOrdering::NotAtomic:
5414 case llvm::AtomicOrdering::Unordered:
5415 llvm_unreachable("Unexpected ordering.");
5416 }
5417 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
5418 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5419 }
5420
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)5421 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
5422 llvm::AtomicOrdering AO, const Expr *X,
5423 const Expr *E, SourceLocation Loc) {
5424 // x = expr;
5425 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
5426 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
5427 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5428 // OpenMP, 2.17.7, atomic Construct
5429 // If the write, update, or capture clause is specified and the release,
5430 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5431 // the atomic operation is also a release flush.
5432 switch (AO) {
5433 case llvm::AtomicOrdering::Release:
5434 case llvm::AtomicOrdering::AcquireRelease:
5435 case llvm::AtomicOrdering::SequentiallyConsistent:
5436 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5437 llvm::AtomicOrdering::Release);
5438 break;
5439 case llvm::AtomicOrdering::Acquire:
5440 case llvm::AtomicOrdering::Monotonic:
5441 break;
5442 case llvm::AtomicOrdering::NotAtomic:
5443 case llvm::AtomicOrdering::Unordered:
5444 llvm_unreachable("Unexpected ordering.");
5445 }
5446 }
5447
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)5448 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5449 RValue Update,
5450 BinaryOperatorKind BO,
5451 llvm::AtomicOrdering AO,
5452 bool IsXLHSInRHSPart) {
5453 ASTContext &Context = CGF.getContext();
5454 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5455 // expression is simple and atomic is allowed for the given type for the
5456 // target platform.
5457 if (BO == BO_Comma || !Update.isScalar() ||
5458 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5459 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5460 (Update.getScalarVal()->getType() !=
5461 X.getAddress(CGF).getElementType())) ||
5462 !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5463 !Context.getTargetInfo().hasBuiltinAtomic(
5464 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5465 return std::make_pair(false, RValue::get(nullptr));
5466
5467 llvm::AtomicRMWInst::BinOp RMWOp;
5468 switch (BO) {
5469 case BO_Add:
5470 RMWOp = llvm::AtomicRMWInst::Add;
5471 break;
5472 case BO_Sub:
5473 if (!IsXLHSInRHSPart)
5474 return std::make_pair(false, RValue::get(nullptr));
5475 RMWOp = llvm::AtomicRMWInst::Sub;
5476 break;
5477 case BO_And:
5478 RMWOp = llvm::AtomicRMWInst::And;
5479 break;
5480 case BO_Or:
5481 RMWOp = llvm::AtomicRMWInst::Or;
5482 break;
5483 case BO_Xor:
5484 RMWOp = llvm::AtomicRMWInst::Xor;
5485 break;
5486 case BO_LT:
5487 RMWOp = X.getType()->hasSignedIntegerRepresentation()
5488 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5489 : llvm::AtomicRMWInst::Max)
5490 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5491 : llvm::AtomicRMWInst::UMax);
5492 break;
5493 case BO_GT:
5494 RMWOp = X.getType()->hasSignedIntegerRepresentation()
5495 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5496 : llvm::AtomicRMWInst::Min)
5497 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5498 : llvm::AtomicRMWInst::UMin);
5499 break;
5500 case BO_Assign:
5501 RMWOp = llvm::AtomicRMWInst::Xchg;
5502 break;
5503 case BO_Mul:
5504 case BO_Div:
5505 case BO_Rem:
5506 case BO_Shl:
5507 case BO_Shr:
5508 case BO_LAnd:
5509 case BO_LOr:
5510 return std::make_pair(false, RValue::get(nullptr));
5511 case BO_PtrMemD:
5512 case BO_PtrMemI:
5513 case BO_LE:
5514 case BO_GE:
5515 case BO_EQ:
5516 case BO_NE:
5517 case BO_Cmp:
5518 case BO_AddAssign:
5519 case BO_SubAssign:
5520 case BO_AndAssign:
5521 case BO_OrAssign:
5522 case BO_XorAssign:
5523 case BO_MulAssign:
5524 case BO_DivAssign:
5525 case BO_RemAssign:
5526 case BO_ShlAssign:
5527 case BO_ShrAssign:
5528 case BO_Comma:
5529 llvm_unreachable("Unsupported atomic update operation");
5530 }
5531 llvm::Value *UpdateVal = Update.getScalarVal();
5532 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5533 UpdateVal = CGF.Builder.CreateIntCast(
5534 IC, X.getAddress(CGF).getElementType(),
5535 X.getType()->hasSignedIntegerRepresentation());
5536 }
5537 llvm::Value *Res =
5538 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5539 return std::make_pair(true, RValue::get(Res));
5540 }
5541
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)5542 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5543 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5544 llvm::AtomicOrdering AO, SourceLocation Loc,
5545 const llvm::function_ref<RValue(RValue)> CommonGen) {
5546 // Update expressions are allowed to have the following forms:
5547 // x binop= expr; -> xrval + expr;
5548 // x++, ++x -> xrval + 1;
5549 // x--, --x -> xrval - 1;
5550 // x = x binop expr; -> xrval binop expr
5551 // x = expr Op x; - > expr binop xrval;
5552 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5553 if (!Res.first) {
5554 if (X.isGlobalReg()) {
5555 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5556 // 'xrval'.
5557 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5558 } else {
5559 // Perform compare-and-swap procedure.
5560 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5561 }
5562 }
5563 return Res;
5564 }
5565
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5566 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5567 llvm::AtomicOrdering AO, const Expr *X,
5568 const Expr *E, const Expr *UE,
5569 bool IsXLHSInRHSPart, SourceLocation Loc) {
5570 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5571 "Update expr in 'atomic update' must be a binary operator.");
5572 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5573 // Update expressions are allowed to have the following forms:
5574 // x binop= expr; -> xrval + expr;
5575 // x++, ++x -> xrval + 1;
5576 // x--, --x -> xrval - 1;
5577 // x = x binop expr; -> xrval binop expr
5578 // x = expr Op x; - > expr binop xrval;
5579 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5580 LValue XLValue = CGF.EmitLValue(X);
5581 RValue ExprRValue = CGF.EmitAnyExpr(E);
5582 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5583 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5584 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5585 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5586 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5587 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5588 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5589 return CGF.EmitAnyExpr(UE);
5590 };
5591 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5592 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5593 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5594 // OpenMP, 2.17.7, atomic Construct
5595 // If the write, update, or capture clause is specified and the release,
5596 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5597 // the atomic operation is also a release flush.
5598 switch (AO) {
5599 case llvm::AtomicOrdering::Release:
5600 case llvm::AtomicOrdering::AcquireRelease:
5601 case llvm::AtomicOrdering::SequentiallyConsistent:
5602 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5603 llvm::AtomicOrdering::Release);
5604 break;
5605 case llvm::AtomicOrdering::Acquire:
5606 case llvm::AtomicOrdering::Monotonic:
5607 break;
5608 case llvm::AtomicOrdering::NotAtomic:
5609 case llvm::AtomicOrdering::Unordered:
5610 llvm_unreachable("Unexpected ordering.");
5611 }
5612 }
5613
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)5614 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5615 QualType SourceType, QualType ResType,
5616 SourceLocation Loc) {
5617 switch (CGF.getEvaluationKind(ResType)) {
5618 case TEK_Scalar:
5619 return RValue::get(
5620 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5621 case TEK_Complex: {
5622 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5623 return RValue::getComplex(Res.first, Res.second);
5624 }
5625 case TEK_Aggregate:
5626 break;
5627 }
5628 llvm_unreachable("Must be a scalar or complex.");
5629 }
5630
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5631 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5632 llvm::AtomicOrdering AO,
5633 bool IsPostfixUpdate, const Expr *V,
5634 const Expr *X, const Expr *E,
5635 const Expr *UE, bool IsXLHSInRHSPart,
5636 SourceLocation Loc) {
5637 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5638 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5639 RValue NewVVal;
5640 LValue VLValue = CGF.EmitLValue(V);
5641 LValue XLValue = CGF.EmitLValue(X);
5642 RValue ExprRValue = CGF.EmitAnyExpr(E);
5643 QualType NewVValType;
5644 if (UE) {
5645 // 'x' is updated with some additional value.
5646 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5647 "Update expr in 'atomic capture' must be a binary operator.");
5648 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5649 // Update expressions are allowed to have the following forms:
5650 // x binop= expr; -> xrval + expr;
5651 // x++, ++x -> xrval + 1;
5652 // x--, --x -> xrval - 1;
5653 // x = x binop expr; -> xrval binop expr
5654 // x = expr Op x; - > expr binop xrval;
5655 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5656 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5657 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5658 NewVValType = XRValExpr->getType();
5659 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5660 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5661 IsPostfixUpdate](RValue XRValue) {
5662 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5663 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5664 RValue Res = CGF.EmitAnyExpr(UE);
5665 NewVVal = IsPostfixUpdate ? XRValue : Res;
5666 return Res;
5667 };
5668 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5669 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5670 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5671 if (Res.first) {
5672 // 'atomicrmw' instruction was generated.
5673 if (IsPostfixUpdate) {
5674 // Use old value from 'atomicrmw'.
5675 NewVVal = Res.second;
5676 } else {
5677 // 'atomicrmw' does not provide new value, so evaluate it using old
5678 // value of 'x'.
5679 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5680 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
5681 NewVVal = CGF.EmitAnyExpr(UE);
5682 }
5683 }
5684 } else {
5685 // 'x' is simply rewritten with some 'expr'.
5686 NewVValType = X->getType().getNonReferenceType();
5687 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
5688 X->getType().getNonReferenceType(), Loc);
5689 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
5690 NewVVal = XRValue;
5691 return ExprRValue;
5692 };
5693 // Try to perform atomicrmw xchg, otherwise simple exchange.
5694 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5695 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
5696 Loc, Gen);
5697 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5698 if (Res.first) {
5699 // 'atomicrmw' instruction was generated.
5700 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
5701 }
5702 }
5703 // Emit post-update store to 'v' of old/new 'x' value.
5704 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
5705 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5706 // OpenMP, 2.17.7, atomic Construct
5707 // If the write, update, or capture clause is specified and the release,
5708 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5709 // the atomic operation is also a release flush.
5710 // If the read or capture clause is specified and the acquire, acq_rel, or
5711 // seq_cst clause is specified then the strong flush on exit from the atomic
5712 // operation is also an acquire flush.
5713 switch (AO) {
5714 case llvm::AtomicOrdering::Release:
5715 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5716 llvm::AtomicOrdering::Release);
5717 break;
5718 case llvm::AtomicOrdering::Acquire:
5719 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5720 llvm::AtomicOrdering::Acquire);
5721 break;
5722 case llvm::AtomicOrdering::AcquireRelease:
5723 case llvm::AtomicOrdering::SequentiallyConsistent:
5724 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5725 llvm::AtomicOrdering::AcquireRelease);
5726 break;
5727 case llvm::AtomicOrdering::Monotonic:
5728 break;
5729 case llvm::AtomicOrdering::NotAtomic:
5730 case llvm::AtomicOrdering::Unordered:
5731 llvm_unreachable("Unexpected ordering.");
5732 }
5733 }
5734
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5735 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
5736 llvm::AtomicOrdering AO, bool IsPostfixUpdate,
5737 const Expr *X, const Expr *V, const Expr *E,
5738 const Expr *UE, bool IsXLHSInRHSPart,
5739 SourceLocation Loc) {
5740 switch (Kind) {
5741 case OMPC_read:
5742 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
5743 break;
5744 case OMPC_write:
5745 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
5746 break;
5747 case OMPC_unknown:
5748 case OMPC_update:
5749 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
5750 break;
5751 case OMPC_capture:
5752 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
5753 IsXLHSInRHSPart, Loc);
5754 break;
5755 case OMPC_if:
5756 case OMPC_final:
5757 case OMPC_num_threads:
5758 case OMPC_private:
5759 case OMPC_firstprivate:
5760 case OMPC_lastprivate:
5761 case OMPC_reduction:
5762 case OMPC_task_reduction:
5763 case OMPC_in_reduction:
5764 case OMPC_safelen:
5765 case OMPC_simdlen:
5766 case OMPC_sizes:
5767 case OMPC_allocator:
5768 case OMPC_allocate:
5769 case OMPC_collapse:
5770 case OMPC_default:
5771 case OMPC_seq_cst:
5772 case OMPC_acq_rel:
5773 case OMPC_acquire:
5774 case OMPC_release:
5775 case OMPC_relaxed:
5776 case OMPC_shared:
5777 case OMPC_linear:
5778 case OMPC_aligned:
5779 case OMPC_copyin:
5780 case OMPC_copyprivate:
5781 case OMPC_flush:
5782 case OMPC_depobj:
5783 case OMPC_proc_bind:
5784 case OMPC_schedule:
5785 case OMPC_ordered:
5786 case OMPC_nowait:
5787 case OMPC_untied:
5788 case OMPC_threadprivate:
5789 case OMPC_depend:
5790 case OMPC_mergeable:
5791 case OMPC_device:
5792 case OMPC_threads:
5793 case OMPC_simd:
5794 case OMPC_map:
5795 case OMPC_num_teams:
5796 case OMPC_thread_limit:
5797 case OMPC_priority:
5798 case OMPC_grainsize:
5799 case OMPC_nogroup:
5800 case OMPC_num_tasks:
5801 case OMPC_hint:
5802 case OMPC_dist_schedule:
5803 case OMPC_defaultmap:
5804 case OMPC_uniform:
5805 case OMPC_to:
5806 case OMPC_from:
5807 case OMPC_use_device_ptr:
5808 case OMPC_use_device_addr:
5809 case OMPC_is_device_ptr:
5810 case OMPC_unified_address:
5811 case OMPC_unified_shared_memory:
5812 case OMPC_reverse_offload:
5813 case OMPC_dynamic_allocators:
5814 case OMPC_atomic_default_mem_order:
5815 case OMPC_device_type:
5816 case OMPC_match:
5817 case OMPC_nontemporal:
5818 case OMPC_order:
5819 case OMPC_destroy:
5820 case OMPC_detach:
5821 case OMPC_inclusive:
5822 case OMPC_exclusive:
5823 case OMPC_uses_allocators:
5824 case OMPC_affinity:
5825 case OMPC_init:
5826 case OMPC_inbranch:
5827 case OMPC_notinbranch:
5828 case OMPC_link:
5829 case OMPC_use:
5830 case OMPC_novariants:
5831 case OMPC_nocontext:
5832 case OMPC_filter:
5833 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
5834 }
5835 }
5836
EmitOMPAtomicDirective(const OMPAtomicDirective & S)5837 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
5838 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
5839 bool MemOrderingSpecified = false;
5840 if (S.getSingleClause<OMPSeqCstClause>()) {
5841 AO = llvm::AtomicOrdering::SequentiallyConsistent;
5842 MemOrderingSpecified = true;
5843 } else if (S.getSingleClause<OMPAcqRelClause>()) {
5844 AO = llvm::AtomicOrdering::AcquireRelease;
5845 MemOrderingSpecified = true;
5846 } else if (S.getSingleClause<OMPAcquireClause>()) {
5847 AO = llvm::AtomicOrdering::Acquire;
5848 MemOrderingSpecified = true;
5849 } else if (S.getSingleClause<OMPReleaseClause>()) {
5850 AO = llvm::AtomicOrdering::Release;
5851 MemOrderingSpecified = true;
5852 } else if (S.getSingleClause<OMPRelaxedClause>()) {
5853 AO = llvm::AtomicOrdering::Monotonic;
5854 MemOrderingSpecified = true;
5855 }
5856 OpenMPClauseKind Kind = OMPC_unknown;
5857 for (const OMPClause *C : S.clauses()) {
5858 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
5859 // if it is first).
5860 if (C->getClauseKind() != OMPC_seq_cst &&
5861 C->getClauseKind() != OMPC_acq_rel &&
5862 C->getClauseKind() != OMPC_acquire &&
5863 C->getClauseKind() != OMPC_release &&
5864 C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) {
5865 Kind = C->getClauseKind();
5866 break;
5867 }
5868 }
5869 if (!MemOrderingSpecified) {
5870 llvm::AtomicOrdering DefaultOrder =
5871 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
5872 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
5873 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
5874 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
5875 Kind == OMPC_capture)) {
5876 AO = DefaultOrder;
5877 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
5878 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
5879 AO = llvm::AtomicOrdering::Release;
5880 } else if (Kind == OMPC_read) {
5881 assert(Kind == OMPC_read && "Unexpected atomic kind.");
5882 AO = llvm::AtomicOrdering::Acquire;
5883 }
5884 }
5885 }
5886
5887 LexicalScope Scope(*this, S.getSourceRange());
5888 EmitStopPoint(S.getAssociatedStmt());
5889 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
5890 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
5891 S.getBeginLoc());
5892 }
5893
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)5894 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
5895 const OMPExecutableDirective &S,
5896 const RegionCodeGenTy &CodeGen) {
5897 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
5898 CodeGenModule &CGM = CGF.CGM;
5899
5900 // On device emit this construct as inlined code.
5901 if (CGM.getLangOpts().OpenMPIsDevice) {
5902 OMPLexicalScope Scope(CGF, S, OMPD_target);
5903 CGM.getOpenMPRuntime().emitInlinedDirective(
5904 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5905 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5906 });
5907 return;
5908 }
5909
5910 auto LPCRegion =
5911 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
5912 llvm::Function *Fn = nullptr;
5913 llvm::Constant *FnID = nullptr;
5914
5915 const Expr *IfCond = nullptr;
5916 // Check for the at most one if clause associated with the target region.
5917 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5918 if (C->getNameModifier() == OMPD_unknown ||
5919 C->getNameModifier() == OMPD_target) {
5920 IfCond = C->getCondition();
5921 break;
5922 }
5923 }
5924
5925 // Check if we have any device clause associated with the directive.
5926 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
5927 nullptr, OMPC_DEVICE_unknown);
5928 if (auto *C = S.getSingleClause<OMPDeviceClause>())
5929 Device.setPointerAndInt(C->getDevice(), C->getModifier());
5930
5931 // Check if we have an if clause whose conditional always evaluates to false
5932 // or if we do not have any targets specified. If so the target region is not
5933 // an offload entry point.
5934 bool IsOffloadEntry = true;
5935 if (IfCond) {
5936 bool Val;
5937 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
5938 IsOffloadEntry = false;
5939 }
5940 if (CGM.getLangOpts().OMPTargetTriples.empty())
5941 IsOffloadEntry = false;
5942
5943 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
5944 StringRef ParentName;
5945 // In case we have Ctors/Dtors we use the complete type variant to produce
5946 // the mangling of the device outlined kernel.
5947 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
5948 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
5949 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
5950 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
5951 else
5952 ParentName =
5953 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
5954
5955 // Emit target region as a standalone region.
5956 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
5957 IsOffloadEntry, CodeGen);
5958 OMPLexicalScope Scope(CGF, S, OMPD_task);
5959 auto &&SizeEmitter =
5960 [IsOffloadEntry](CodeGenFunction &CGF,
5961 const OMPLoopDirective &D) -> llvm::Value * {
5962 if (IsOffloadEntry) {
5963 OMPLoopScope(CGF, D);
5964 // Emit calculation of the iterations count.
5965 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
5966 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
5967 /*isSigned=*/false);
5968 return NumIterations;
5969 }
5970 return nullptr;
5971 };
5972 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
5973 SizeEmitter);
5974 }
5975
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)5976 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
5977 PrePostActionTy &Action) {
5978 Action.Enter(CGF);
5979 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5980 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5981 CGF.EmitOMPPrivateClause(S, PrivateScope);
5982 (void)PrivateScope.Privatize();
5983 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5984 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5985
5986 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
5987 CGF.EnsureInsertPoint();
5988 }
5989
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)5990 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
5991 StringRef ParentName,
5992 const OMPTargetDirective &S) {
5993 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5994 emitTargetRegion(CGF, S, Action);
5995 };
5996 llvm::Function *Fn;
5997 llvm::Constant *Addr;
5998 // Emit target region as a standalone region.
5999 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6000 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6001 assert(Fn && Addr && "Target device function emission failed.");
6002 }
6003
EmitOMPTargetDirective(const OMPTargetDirective & S)6004 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6005 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6006 emitTargetRegion(CGF, S, Action);
6007 };
6008 emitCommonOMPTargetDirective(*this, S, CodeGen);
6009 }
6010
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)6011 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6012 const OMPExecutableDirective &S,
6013 OpenMPDirectiveKind InnermostKind,
6014 const RegionCodeGenTy &CodeGen) {
6015 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6016 llvm::Function *OutlinedFn =
6017 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6018 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
6019
6020 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6021 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6022 if (NT || TL) {
6023 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6024 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6025
6026 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6027 S.getBeginLoc());
6028 }
6029
6030 OMPTeamsScope Scope(CGF, S);
6031 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6032 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6033 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6034 CapturedVars);
6035 }
6036
EmitOMPTeamsDirective(const OMPTeamsDirective & S)6037 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6038 // Emit teams region as a standalone region.
6039 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6040 Action.Enter(CGF);
6041 OMPPrivateScope PrivateScope(CGF);
6042 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6043 CGF.EmitOMPPrivateClause(S, PrivateScope);
6044 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6045 (void)PrivateScope.Privatize();
6046 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6047 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6048 };
6049 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6050 emitPostUpdateForReductionClause(*this, S,
6051 [](CodeGenFunction &) { return nullptr; });
6052 }
6053
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)6054 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6055 const OMPTargetTeamsDirective &S) {
6056 auto *CS = S.getCapturedStmt(OMPD_teams);
6057 Action.Enter(CGF);
6058 // Emit teams region as a standalone region.
6059 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6060 Action.Enter(CGF);
6061 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6062 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6063 CGF.EmitOMPPrivateClause(S, PrivateScope);
6064 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6065 (void)PrivateScope.Privatize();
6066 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6067 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6068 CGF.EmitStmt(CS->getCapturedStmt());
6069 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6070 };
6071 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6072 emitPostUpdateForReductionClause(CGF, S,
6073 [](CodeGenFunction &) { return nullptr; });
6074 }
6075
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)6076 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6077 CodeGenModule &CGM, StringRef ParentName,
6078 const OMPTargetTeamsDirective &S) {
6079 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6080 emitTargetTeamsRegion(CGF, Action, S);
6081 };
6082 llvm::Function *Fn;
6083 llvm::Constant *Addr;
6084 // Emit target region as a standalone region.
6085 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6086 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6087 assert(Fn && Addr && "Target device function emission failed.");
6088 }
6089
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)6090 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6091 const OMPTargetTeamsDirective &S) {
6092 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6093 emitTargetTeamsRegion(CGF, Action, S);
6094 };
6095 emitCommonOMPTargetDirective(*this, S, CodeGen);
6096 }
6097
6098 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)6099 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6100 const OMPTargetTeamsDistributeDirective &S) {
6101 Action.Enter(CGF);
6102 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6103 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6104 };
6105
6106 // Emit teams region as a standalone region.
6107 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6108 PrePostActionTy &Action) {
6109 Action.Enter(CGF);
6110 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6111 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6112 (void)PrivateScope.Privatize();
6113 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6114 CodeGenDistribute);
6115 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6116 };
6117 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6118 emitPostUpdateForReductionClause(CGF, S,
6119 [](CodeGenFunction &) { return nullptr; });
6120 }
6121
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)6122 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6123 CodeGenModule &CGM, StringRef ParentName,
6124 const OMPTargetTeamsDistributeDirective &S) {
6125 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6126 emitTargetTeamsDistributeRegion(CGF, Action, S);
6127 };
6128 llvm::Function *Fn;
6129 llvm::Constant *Addr;
6130 // Emit target region as a standalone region.
6131 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6132 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6133 assert(Fn && Addr && "Target device function emission failed.");
6134 }
6135
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)6136 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6137 const OMPTargetTeamsDistributeDirective &S) {
6138 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6139 emitTargetTeamsDistributeRegion(CGF, Action, S);
6140 };
6141 emitCommonOMPTargetDirective(*this, S, CodeGen);
6142 }
6143
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)6144 static void emitTargetTeamsDistributeSimdRegion(
6145 CodeGenFunction &CGF, PrePostActionTy &Action,
6146 const OMPTargetTeamsDistributeSimdDirective &S) {
6147 Action.Enter(CGF);
6148 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6149 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6150 };
6151
6152 // Emit teams region as a standalone region.
6153 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6154 PrePostActionTy &Action) {
6155 Action.Enter(CGF);
6156 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6157 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6158 (void)PrivateScope.Privatize();
6159 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6160 CodeGenDistribute);
6161 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6162 };
6163 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6164 emitPostUpdateForReductionClause(CGF, S,
6165 [](CodeGenFunction &) { return nullptr; });
6166 }
6167
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)6168 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6169 CodeGenModule &CGM, StringRef ParentName,
6170 const OMPTargetTeamsDistributeSimdDirective &S) {
6171 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6172 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6173 };
6174 llvm::Function *Fn;
6175 llvm::Constant *Addr;
6176 // Emit target region as a standalone region.
6177 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6178 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6179 assert(Fn && Addr && "Target device function emission failed.");
6180 }
6181
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)6182 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6183 const OMPTargetTeamsDistributeSimdDirective &S) {
6184 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6185 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6186 };
6187 emitCommonOMPTargetDirective(*this, S, CodeGen);
6188 }
6189
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)6190 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6191 const OMPTeamsDistributeDirective &S) {
6192
6193 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6194 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6195 };
6196
6197 // Emit teams region as a standalone region.
6198 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6199 PrePostActionTy &Action) {
6200 Action.Enter(CGF);
6201 OMPPrivateScope PrivateScope(CGF);
6202 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6203 (void)PrivateScope.Privatize();
6204 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6205 CodeGenDistribute);
6206 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6207 };
6208 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6209 emitPostUpdateForReductionClause(*this, S,
6210 [](CodeGenFunction &) { return nullptr; });
6211 }
6212
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)6213 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6214 const OMPTeamsDistributeSimdDirective &S) {
6215 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6216 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6217 };
6218
6219 // Emit teams region as a standalone region.
6220 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6221 PrePostActionTy &Action) {
6222 Action.Enter(CGF);
6223 OMPPrivateScope PrivateScope(CGF);
6224 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6225 (void)PrivateScope.Privatize();
6226 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6227 CodeGenDistribute);
6228 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6229 };
6230 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6231 emitPostUpdateForReductionClause(*this, S,
6232 [](CodeGenFunction &) { return nullptr; });
6233 }
6234
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)6235 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6236 const OMPTeamsDistributeParallelForDirective &S) {
6237 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6238 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6239 S.getDistInc());
6240 };
6241
6242 // Emit teams region as a standalone region.
6243 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6244 PrePostActionTy &Action) {
6245 Action.Enter(CGF);
6246 OMPPrivateScope PrivateScope(CGF);
6247 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6248 (void)PrivateScope.Privatize();
6249 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6250 CodeGenDistribute);
6251 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6252 };
6253 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6254 emitPostUpdateForReductionClause(*this, S,
6255 [](CodeGenFunction &) { return nullptr; });
6256 }
6257
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)6258 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6259 const OMPTeamsDistributeParallelForSimdDirective &S) {
6260 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6261 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6262 S.getDistInc());
6263 };
6264
6265 // Emit teams region as a standalone region.
6266 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6267 PrePostActionTy &Action) {
6268 Action.Enter(CGF);
6269 OMPPrivateScope PrivateScope(CGF);
6270 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6271 (void)PrivateScope.Privatize();
6272 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6273 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6274 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6275 };
6276 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6277 CodeGen);
6278 emitPostUpdateForReductionClause(*this, S,
6279 [](CodeGenFunction &) { return nullptr; });
6280 }
6281
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)6282 static void emitTargetTeamsDistributeParallelForRegion(
6283 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
6284 PrePostActionTy &Action) {
6285 Action.Enter(CGF);
6286 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6287 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6288 S.getDistInc());
6289 };
6290
6291 // Emit teams region as a standalone region.
6292 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6293 PrePostActionTy &Action) {
6294 Action.Enter(CGF);
6295 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6296 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6297 (void)PrivateScope.Privatize();
6298 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6299 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6300 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6301 };
6302
6303 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
6304 CodeGenTeams);
6305 emitPostUpdateForReductionClause(CGF, S,
6306 [](CodeGenFunction &) { return nullptr; });
6307 }
6308
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)6309 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
6310 CodeGenModule &CGM, StringRef ParentName,
6311 const OMPTargetTeamsDistributeParallelForDirective &S) {
6312 // Emit SPMD target teams distribute parallel for region as a standalone
6313 // region.
6314 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6315 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
6316 };
6317 llvm::Function *Fn;
6318 llvm::Constant *Addr;
6319 // Emit target region as a standalone region.
6320 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6321 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6322 assert(Fn && Addr && "Target device function emission failed.");
6323 }
6324
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)6325 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
6326 const OMPTargetTeamsDistributeParallelForDirective &S) {
6327 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6328 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
6329 };
6330 emitCommonOMPTargetDirective(*this, S, CodeGen);
6331 }
6332
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)6333 static void emitTargetTeamsDistributeParallelForSimdRegion(
6334 CodeGenFunction &CGF,
6335 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
6336 PrePostActionTy &Action) {
6337 Action.Enter(CGF);
6338 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6339 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6340 S.getDistInc());
6341 };
6342
6343 // Emit teams region as a standalone region.
6344 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6345 PrePostActionTy &Action) {
6346 Action.Enter(CGF);
6347 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6348 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6349 (void)PrivateScope.Privatize();
6350 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6351 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6352 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6353 };
6354
6355 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
6356 CodeGenTeams);
6357 emitPostUpdateForReductionClause(CGF, S,
6358 [](CodeGenFunction &) { return nullptr; });
6359 }
6360
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)6361 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
6362 CodeGenModule &CGM, StringRef ParentName,
6363 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
6364 // Emit SPMD target teams distribute parallel for simd region as a standalone
6365 // region.
6366 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6367 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
6368 };
6369 llvm::Function *Fn;
6370 llvm::Constant *Addr;
6371 // Emit target region as a standalone region.
6372 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6373 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6374 assert(Fn && Addr && "Target device function emission failed.");
6375 }
6376
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)6377 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
6378 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
6379 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6380 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
6381 };
6382 emitCommonOMPTargetDirective(*this, S, CodeGen);
6383 }
6384
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)6385 void CodeGenFunction::EmitOMPCancellationPointDirective(
6386 const OMPCancellationPointDirective &S) {
6387 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
6388 S.getCancelRegion());
6389 }
6390
EmitOMPCancelDirective(const OMPCancelDirective & S)6391 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
6392 const Expr *IfCond = nullptr;
6393 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6394 if (C->getNameModifier() == OMPD_unknown ||
6395 C->getNameModifier() == OMPD_cancel) {
6396 IfCond = C->getCondition();
6397 break;
6398 }
6399 }
6400 if (CGM.getLangOpts().OpenMPIRBuilder) {
6401 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6402 // TODO: This check is necessary as we only generate `omp parallel` through
6403 // the OpenMPIRBuilder for now.
6404 if (S.getCancelRegion() == OMPD_parallel ||
6405 S.getCancelRegion() == OMPD_sections ||
6406 S.getCancelRegion() == OMPD_section) {
6407 llvm::Value *IfCondition = nullptr;
6408 if (IfCond)
6409 IfCondition = EmitScalarExpr(IfCond,
6410 /*IgnoreResultAssign=*/true);
6411 return Builder.restoreIP(
6412 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
6413 }
6414 }
6415
6416 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
6417 S.getCancelRegion());
6418 }
6419
6420 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)6421 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
6422 if (Kind == OMPD_parallel || Kind == OMPD_task ||
6423 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
6424 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
6425 return ReturnBlock;
6426 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
6427 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
6428 Kind == OMPD_distribute_parallel_for ||
6429 Kind == OMPD_target_parallel_for ||
6430 Kind == OMPD_teams_distribute_parallel_for ||
6431 Kind == OMPD_target_teams_distribute_parallel_for);
6432 return OMPCancelStack.getExitBlock();
6433 }
6434
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)6435 void CodeGenFunction::EmitOMPUseDevicePtrClause(
6436 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
6437 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6438 auto OrigVarIt = C.varlist_begin();
6439 auto InitIt = C.inits().begin();
6440 for (const Expr *PvtVarIt : C.private_copies()) {
6441 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
6442 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
6443 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
6444
6445 // In order to identify the right initializer we need to match the
6446 // declaration used by the mapping logic. In some cases we may get
6447 // OMPCapturedExprDecl that refers to the original declaration.
6448 const ValueDecl *MatchingVD = OrigVD;
6449 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6450 // OMPCapturedExprDecl are used to privative fields of the current
6451 // structure.
6452 const auto *ME = cast<MemberExpr>(OED->getInit());
6453 assert(isa<CXXThisExpr>(ME->getBase()) &&
6454 "Base should be the current struct!");
6455 MatchingVD = ME->getMemberDecl();
6456 }
6457
6458 // If we don't have information about the current list item, move on to
6459 // the next one.
6460 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6461 if (InitAddrIt == CaptureDeviceAddrMap.end())
6462 continue;
6463
6464 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
6465 InitAddrIt, InitVD,
6466 PvtVD]() {
6467 // Initialize the temporary initialization variable with the address we
6468 // get from the runtime library. We have to cast the source address
6469 // because it is always a void *. References are materialized in the
6470 // privatization scope, so the initialization here disregards the fact
6471 // the original variable is a reference.
6472 QualType AddrQTy =
6473 getContext().getPointerType(OrigVD->getType().getNonReferenceType());
6474 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
6475 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
6476 setAddrOfLocalVar(InitVD, InitAddr);
6477
6478 // Emit private declaration, it will be initialized by the value we
6479 // declaration we just added to the local declarations map.
6480 EmitDecl(*PvtVD);
6481
6482 // The initialization variables reached its purpose in the emission
6483 // of the previous declaration, so we don't need it anymore.
6484 LocalDeclMap.erase(InitVD);
6485
6486 // Return the address of the private variable.
6487 return GetAddrOfLocalVar(PvtVD);
6488 });
6489 assert(IsRegistered && "firstprivate var already registered as private");
6490 // Silence the warning about unused variable.
6491 (void)IsRegistered;
6492
6493 ++OrigVarIt;
6494 ++InitIt;
6495 }
6496 }
6497
getBaseDecl(const Expr * Ref)6498 static const VarDecl *getBaseDecl(const Expr *Ref) {
6499 const Expr *Base = Ref->IgnoreParenImpCasts();
6500 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6501 Base = OASE->getBase()->IgnoreParenImpCasts();
6502 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6503 Base = ASE->getBase()->IgnoreParenImpCasts();
6504 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6505 }
6506
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)6507 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6508 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6509 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6510 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6511 for (const Expr *Ref : C.varlists()) {
6512 const VarDecl *OrigVD = getBaseDecl(Ref);
6513 if (!Processed.insert(OrigVD).second)
6514 continue;
6515 // In order to identify the right initializer we need to match the
6516 // declaration used by the mapping logic. In some cases we may get
6517 // OMPCapturedExprDecl that refers to the original declaration.
6518 const ValueDecl *MatchingVD = OrigVD;
6519 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6520 // OMPCapturedExprDecl are used to privative fields of the current
6521 // structure.
6522 const auto *ME = cast<MemberExpr>(OED->getInit());
6523 assert(isa<CXXThisExpr>(ME->getBase()) &&
6524 "Base should be the current struct!");
6525 MatchingVD = ME->getMemberDecl();
6526 }
6527
6528 // If we don't have information about the current list item, move on to
6529 // the next one.
6530 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6531 if (InitAddrIt == CaptureDeviceAddrMap.end())
6532 continue;
6533
6534 Address PrivAddr = InitAddrIt->getSecond();
6535 // For declrefs and variable length array need to load the pointer for
6536 // correct mapping, since the pointer to the data was passed to the runtime.
6537 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6538 MatchingVD->getType()->isArrayType())
6539 PrivAddr =
6540 EmitLoadOfPointer(PrivAddr, getContext()
6541 .getPointerType(OrigVD->getType())
6542 ->castAs<PointerType>());
6543 llvm::Type *RealTy =
6544 ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6545 ->getPointerTo();
6546 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6547
6548 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6549 }
6550 }
6551
6552 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)6553 void CodeGenFunction::EmitOMPTargetDataDirective(
6554 const OMPTargetDataDirective &S) {
6555 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
6556 /*SeparateBeginEndCalls=*/true);
6557
6558 // Create a pre/post action to signal the privatization of the device pointer.
6559 // This action can be replaced by the OpenMP runtime code generation to
6560 // deactivate privatization.
6561 bool PrivatizeDevicePointers = false;
6562 class DevicePointerPrivActionTy : public PrePostActionTy {
6563 bool &PrivatizeDevicePointers;
6564
6565 public:
6566 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
6567 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
6568 void Enter(CodeGenFunction &CGF) override {
6569 PrivatizeDevicePointers = true;
6570 }
6571 };
6572 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
6573
6574 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
6575 CodeGenFunction &CGF, PrePostActionTy &Action) {
6576 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6577 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6578 };
6579
6580 // Codegen that selects whether to generate the privatization code or not.
6581 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
6582 &InnermostCodeGen](CodeGenFunction &CGF,
6583 PrePostActionTy &Action) {
6584 RegionCodeGenTy RCG(InnermostCodeGen);
6585 PrivatizeDevicePointers = false;
6586
6587 // Call the pre-action to change the status of PrivatizeDevicePointers if
6588 // needed.
6589 Action.Enter(CGF);
6590
6591 if (PrivatizeDevicePointers) {
6592 OMPPrivateScope PrivateScope(CGF);
6593 // Emit all instances of the use_device_ptr clause.
6594 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
6595 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
6596 Info.CaptureDeviceAddrMap);
6597 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6598 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6599 Info.CaptureDeviceAddrMap);
6600 (void)PrivateScope.Privatize();
6601 RCG(CGF);
6602 } else {
6603 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6604 RCG(CGF);
6605 }
6606 };
6607
6608 // Forward the provided action to the privatization codegen.
6609 RegionCodeGenTy PrivRCG(PrivCodeGen);
6610 PrivRCG.setAction(Action);
6611
6612 // Notwithstanding the body of the region is emitted as inlined directive,
6613 // we don't use an inline scope as changes in the references inside the
6614 // region are expected to be visible outside, so we do not privative them.
6615 OMPLexicalScope Scope(CGF, S);
6616 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
6617 PrivRCG);
6618 };
6619
6620 RegionCodeGenTy RCG(CodeGen);
6621
6622 // If we don't have target devices, don't bother emitting the data mapping
6623 // code.
6624 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
6625 RCG(*this);
6626 return;
6627 }
6628
6629 // Check if we have any if clause associated with the directive.
6630 const Expr *IfCond = nullptr;
6631 if (const auto *C = S.getSingleClause<OMPIfClause>())
6632 IfCond = C->getCondition();
6633
6634 // Check if we have any device clause associated with the directive.
6635 const Expr *Device = nullptr;
6636 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6637 Device = C->getDevice();
6638
6639 // Set the action to signal privatization of device pointers.
6640 RCG.setAction(PrivAction);
6641
6642 // Emit region code.
6643 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
6644 Info);
6645 }
6646
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)6647 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
6648 const OMPTargetEnterDataDirective &S) {
6649 // If we don't have target devices, don't bother emitting the data mapping
6650 // code.
6651 if (CGM.getLangOpts().OMPTargetTriples.empty())
6652 return;
6653
6654 // Check if we have any if clause associated with the directive.
6655 const Expr *IfCond = nullptr;
6656 if (const auto *C = S.getSingleClause<OMPIfClause>())
6657 IfCond = C->getCondition();
6658
6659 // Check if we have any device clause associated with the directive.
6660 const Expr *Device = nullptr;
6661 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6662 Device = C->getDevice();
6663
6664 OMPLexicalScope Scope(*this, S, OMPD_task);
6665 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6666 }
6667
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)6668 void CodeGenFunction::EmitOMPTargetExitDataDirective(
6669 const OMPTargetExitDataDirective &S) {
6670 // If we don't have target devices, don't bother emitting the data mapping
6671 // code.
6672 if (CGM.getLangOpts().OMPTargetTriples.empty())
6673 return;
6674
6675 // Check if we have any if clause associated with the directive.
6676 const Expr *IfCond = nullptr;
6677 if (const auto *C = S.getSingleClause<OMPIfClause>())
6678 IfCond = C->getCondition();
6679
6680 // Check if we have any device clause associated with the directive.
6681 const Expr *Device = nullptr;
6682 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6683 Device = C->getDevice();
6684
6685 OMPLexicalScope Scope(*this, S, OMPD_task);
6686 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6687 }
6688
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)6689 static void emitTargetParallelRegion(CodeGenFunction &CGF,
6690 const OMPTargetParallelDirective &S,
6691 PrePostActionTy &Action) {
6692 // Get the captured statement associated with the 'parallel' region.
6693 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
6694 Action.Enter(CGF);
6695 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6696 Action.Enter(CGF);
6697 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6698 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6699 CGF.EmitOMPPrivateClause(S, PrivateScope);
6700 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6701 (void)PrivateScope.Privatize();
6702 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6703 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6704 // TODO: Add support for clauses.
6705 CGF.EmitStmt(CS->getCapturedStmt());
6706 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
6707 };
6708 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
6709 emitEmptyBoundParameters);
6710 emitPostUpdateForReductionClause(CGF, S,
6711 [](CodeGenFunction &) { return nullptr; });
6712 }
6713
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)6714 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
6715 CodeGenModule &CGM, StringRef ParentName,
6716 const OMPTargetParallelDirective &S) {
6717 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6718 emitTargetParallelRegion(CGF, S, Action);
6719 };
6720 llvm::Function *Fn;
6721 llvm::Constant *Addr;
6722 // Emit target region as a standalone region.
6723 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6724 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6725 assert(Fn && Addr && "Target device function emission failed.");
6726 }
6727
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)6728 void CodeGenFunction::EmitOMPTargetParallelDirective(
6729 const OMPTargetParallelDirective &S) {
6730 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6731 emitTargetParallelRegion(CGF, S, Action);
6732 };
6733 emitCommonOMPTargetDirective(*this, S, CodeGen);
6734 }
6735
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)6736 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
6737 const OMPTargetParallelForDirective &S,
6738 PrePostActionTy &Action) {
6739 Action.Enter(CGF);
6740 // Emit directive as a combined directive that consists of two implicit
6741 // directives: 'parallel' with 'for' directive.
6742 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6743 Action.Enter(CGF);
6744 CodeGenFunction::OMPCancelStackRAII CancelRegion(
6745 CGF, OMPD_target_parallel_for, S.hasCancel());
6746 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6747 emitDispatchForLoopBounds);
6748 };
6749 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
6750 emitEmptyBoundParameters);
6751 }
6752
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)6753 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
6754 CodeGenModule &CGM, StringRef ParentName,
6755 const OMPTargetParallelForDirective &S) {
6756 // Emit SPMD target parallel for region as a standalone region.
6757 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6758 emitTargetParallelForRegion(CGF, S, Action);
6759 };
6760 llvm::Function *Fn;
6761 llvm::Constant *Addr;
6762 // Emit target region as a standalone region.
6763 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6764 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6765 assert(Fn && Addr && "Target device function emission failed.");
6766 }
6767
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)6768 void CodeGenFunction::EmitOMPTargetParallelForDirective(
6769 const OMPTargetParallelForDirective &S) {
6770 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6771 emitTargetParallelForRegion(CGF, S, Action);
6772 };
6773 emitCommonOMPTargetDirective(*this, S, CodeGen);
6774 }
6775
6776 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)6777 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
6778 const OMPTargetParallelForSimdDirective &S,
6779 PrePostActionTy &Action) {
6780 Action.Enter(CGF);
6781 // Emit directive as a combined directive that consists of two implicit
6782 // directives: 'parallel' with 'for' directive.
6783 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6784 Action.Enter(CGF);
6785 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6786 emitDispatchForLoopBounds);
6787 };
6788 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
6789 emitEmptyBoundParameters);
6790 }
6791
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)6792 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
6793 CodeGenModule &CGM, StringRef ParentName,
6794 const OMPTargetParallelForSimdDirective &S) {
6795 // Emit SPMD target parallel for region as a standalone region.
6796 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6797 emitTargetParallelForSimdRegion(CGF, S, Action);
6798 };
6799 llvm::Function *Fn;
6800 llvm::Constant *Addr;
6801 // Emit target region as a standalone region.
6802 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6803 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6804 assert(Fn && Addr && "Target device function emission failed.");
6805 }
6806
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)6807 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
6808 const OMPTargetParallelForSimdDirective &S) {
6809 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6810 emitTargetParallelForSimdRegion(CGF, S, Action);
6811 };
6812 emitCommonOMPTargetDirective(*this, S, CodeGen);
6813 }
6814
6815 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)6816 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
6817 const ImplicitParamDecl *PVD,
6818 CodeGenFunction::OMPPrivateScope &Privates) {
6819 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
6820 Privates.addPrivate(VDecl,
6821 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
6822 }
6823
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)6824 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
6825 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
6826 // Emit outlined function for task construct.
6827 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
6828 Address CapturedStruct = Address::invalid();
6829 {
6830 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6831 CapturedStruct = GenerateCapturedStmtArgument(*CS);
6832 }
6833 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
6834 const Expr *IfCond = nullptr;
6835 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6836 if (C->getNameModifier() == OMPD_unknown ||
6837 C->getNameModifier() == OMPD_taskloop) {
6838 IfCond = C->getCondition();
6839 break;
6840 }
6841 }
6842
6843 OMPTaskDataTy Data;
6844 // Check if taskloop must be emitted without taskgroup.
6845 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
6846 // TODO: Check if we should emit tied or untied task.
6847 Data.Tied = true;
6848 // Set scheduling for taskloop
6849 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
6850 // grainsize clause
6851 Data.Schedule.setInt(/*IntVal=*/false);
6852 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
6853 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
6854 // num_tasks clause
6855 Data.Schedule.setInt(/*IntVal=*/true);
6856 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
6857 }
6858
6859 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
6860 // if (PreCond) {
6861 // for (IV in 0..LastIteration) BODY;
6862 // <Final counter/linear vars updates>;
6863 // }
6864 //
6865
6866 // Emit: if (PreCond) - begin.
6867 // If the condition constant folds and can be elided, avoid emitting the
6868 // whole loop.
6869 bool CondConstant;
6870 llvm::BasicBlock *ContBlock = nullptr;
6871 OMPLoopScope PreInitScope(CGF, S);
6872 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6873 if (!CondConstant)
6874 return;
6875 } else {
6876 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
6877 ContBlock = CGF.createBasicBlock("taskloop.if.end");
6878 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
6879 CGF.getProfileCount(&S));
6880 CGF.EmitBlock(ThenBlock);
6881 CGF.incrementProfileCounter(&S);
6882 }
6883
6884 (void)CGF.EmitOMPLinearClauseInit(S);
6885
6886 OMPPrivateScope LoopScope(CGF);
6887 // Emit helper vars inits.
6888 enum { LowerBound = 5, UpperBound, Stride, LastIter };
6889 auto *I = CS->getCapturedDecl()->param_begin();
6890 auto *LBP = std::next(I, LowerBound);
6891 auto *UBP = std::next(I, UpperBound);
6892 auto *STP = std::next(I, Stride);
6893 auto *LIP = std::next(I, LastIter);
6894 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
6895 LoopScope);
6896 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
6897 LoopScope);
6898 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
6899 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
6900 LoopScope);
6901 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
6902 CGF.EmitOMPLinearClause(S, LoopScope);
6903 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
6904 (void)LoopScope.Privatize();
6905 // Emit the loop iteration variable.
6906 const Expr *IVExpr = S.getIterationVariable();
6907 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
6908 CGF.EmitVarDecl(*IVDecl);
6909 CGF.EmitIgnoredExpr(S.getInit());
6910
6911 // Emit the iterations count variable.
6912 // If it is not a variable, Sema decided to calculate iterations count on
6913 // each iteration (e.g., it is foldable into a constant).
6914 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6915 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6916 // Emit calculation of the iterations count.
6917 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
6918 }
6919
6920 {
6921 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6922 emitCommonSimdLoop(
6923 CGF, S,
6924 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6925 if (isOpenMPSimdDirective(S.getDirectiveKind()))
6926 CGF.EmitOMPSimdInit(S);
6927 },
6928 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
6929 CGF.EmitOMPInnerLoop(
6930 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
6931 [&S](CodeGenFunction &CGF) {
6932 emitOMPLoopBodyWithStopPoint(CGF, S,
6933 CodeGenFunction::JumpDest());
6934 },
6935 [](CodeGenFunction &) {});
6936 });
6937 }
6938 // Emit: if (PreCond) - end.
6939 if (ContBlock) {
6940 CGF.EmitBranch(ContBlock);
6941 CGF.EmitBlock(ContBlock, true);
6942 }
6943 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6944 if (HasLastprivateClause) {
6945 CGF.EmitOMPLastprivateClauseFinal(
6946 S, isOpenMPSimdDirective(S.getDirectiveKind()),
6947 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
6948 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6949 (*LIP)->getType(), S.getBeginLoc())));
6950 }
6951 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
6952 return CGF.Builder.CreateIsNotNull(
6953 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6954 (*LIP)->getType(), S.getBeginLoc()));
6955 });
6956 };
6957 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
6958 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
6959 const OMPTaskDataTy &Data) {
6960 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
6961 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
6962 OMPLoopScope PreInitScope(CGF, S);
6963 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
6964 OutlinedFn, SharedsTy,
6965 CapturedStruct, IfCond, Data);
6966 };
6967 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
6968 CodeGen);
6969 };
6970 if (Data.Nogroup) {
6971 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
6972 } else {
6973 CGM.getOpenMPRuntime().emitTaskgroupRegion(
6974 *this,
6975 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
6976 PrePostActionTy &Action) {
6977 Action.Enter(CGF);
6978 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
6979 Data);
6980 },
6981 S.getBeginLoc());
6982 }
6983 }
6984
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)6985 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
6986 auto LPCRegion =
6987 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6988 EmitOMPTaskLoopBasedDirective(S);
6989 }
6990
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)6991 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
6992 const OMPTaskLoopSimdDirective &S) {
6993 auto LPCRegion =
6994 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6995 OMPLexicalScope Scope(*this, S);
6996 EmitOMPTaskLoopBasedDirective(S);
6997 }
6998
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)6999 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7000 const OMPMasterTaskLoopDirective &S) {
7001 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7002 Action.Enter(CGF);
7003 EmitOMPTaskLoopBasedDirective(S);
7004 };
7005 auto LPCRegion =
7006 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7007 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
7008 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7009 }
7010
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)7011 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7012 const OMPMasterTaskLoopSimdDirective &S) {
7013 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7014 Action.Enter(CGF);
7015 EmitOMPTaskLoopBasedDirective(S);
7016 };
7017 auto LPCRegion =
7018 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7019 OMPLexicalScope Scope(*this, S);
7020 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7021 }
7022
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)7023 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7024 const OMPParallelMasterTaskLoopDirective &S) {
7025 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7026 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7027 PrePostActionTy &Action) {
7028 Action.Enter(CGF);
7029 CGF.EmitOMPTaskLoopBasedDirective(S);
7030 };
7031 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7032 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7033 S.getBeginLoc());
7034 };
7035 auto LPCRegion =
7036 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7037 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7038 emitEmptyBoundParameters);
7039 }
7040
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)7041 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7042 const OMPParallelMasterTaskLoopSimdDirective &S) {
7043 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7044 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7045 PrePostActionTy &Action) {
7046 Action.Enter(CGF);
7047 CGF.EmitOMPTaskLoopBasedDirective(S);
7048 };
7049 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7050 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7051 S.getBeginLoc());
7052 };
7053 auto LPCRegion =
7054 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7055 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7056 emitEmptyBoundParameters);
7057 }
7058
7059 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)7060 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7061 const OMPTargetUpdateDirective &S) {
7062 // If we don't have target devices, don't bother emitting the data mapping
7063 // code.
7064 if (CGM.getLangOpts().OMPTargetTriples.empty())
7065 return;
7066
7067 // Check if we have any if clause associated with the directive.
7068 const Expr *IfCond = nullptr;
7069 if (const auto *C = S.getSingleClause<OMPIfClause>())
7070 IfCond = C->getCondition();
7071
7072 // Check if we have any device clause associated with the directive.
7073 const Expr *Device = nullptr;
7074 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7075 Device = C->getDevice();
7076
7077 OMPLexicalScope Scope(*this, S, OMPD_task);
7078 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7079 }
7080
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)7081 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7082 const OMPExecutableDirective &D) {
7083 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7084 EmitOMPScanDirective(*SD);
7085 return;
7086 }
7087 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
7088 return;
7089 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
7090 OMPPrivateScope GlobalsScope(CGF);
7091 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
7092 // Capture global firstprivates to avoid crash.
7093 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
7094 for (const Expr *Ref : C->varlists()) {
7095 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
7096 if (!DRE)
7097 continue;
7098 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
7099 if (!VD || VD->hasLocalStorage())
7100 continue;
7101 if (!CGF.LocalDeclMap.count(VD)) {
7102 LValue GlobLVal = CGF.EmitLValue(Ref);
7103 GlobalsScope.addPrivate(
7104 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
7105 }
7106 }
7107 }
7108 }
7109 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
7110 (void)GlobalsScope.Privatize();
7111 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
7112 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
7113 } else {
7114 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
7115 for (const Expr *E : LD->counters()) {
7116 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
7117 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
7118 LValue GlobLVal = CGF.EmitLValue(E);
7119 GlobalsScope.addPrivate(
7120 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
7121 }
7122 if (isa<OMPCapturedExprDecl>(VD)) {
7123 // Emit only those that were not explicitly referenced in clauses.
7124 if (!CGF.LocalDeclMap.count(VD))
7125 CGF.EmitVarDecl(*VD);
7126 }
7127 }
7128 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
7129 if (!C->getNumForLoops())
7130 continue;
7131 for (unsigned I = LD->getLoopsNumber(),
7132 E = C->getLoopNumIterations().size();
7133 I < E; ++I) {
7134 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
7135 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
7136 // Emit only those that were not explicitly referenced in clauses.
7137 if (!CGF.LocalDeclMap.count(VD))
7138 CGF.EmitVarDecl(*VD);
7139 }
7140 }
7141 }
7142 }
7143 (void)GlobalsScope.Privatize();
7144 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
7145 }
7146 };
7147 if (D.getDirectiveKind() == OMPD_atomic ||
7148 D.getDirectiveKind() == OMPD_critical ||
7149 D.getDirectiveKind() == OMPD_section ||
7150 D.getDirectiveKind() == OMPD_master ||
7151 D.getDirectiveKind() == OMPD_masked) {
7152 EmitStmt(D.getAssociatedStmt());
7153 } else {
7154 auto LPCRegion =
7155 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
7156 OMPSimdLexicalScope Scope(*this, D);
7157 CGM.getOpenMPRuntime().emitInlinedDirective(
7158 *this,
7159 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
7160 : D.getDirectiveKind(),
7161 CodeGen);
7162 }
7163 // Check for outer lastprivate conditional update.
7164 checkForLastprivateConditionalUpdate(*this, D);
7165 }
7166