1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)42 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43 for (const auto *C : S.clauses()) {
44 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45 if (const auto *PreInit =
46 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47 for (const auto *I : PreInit->decls()) {
48 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49 CGF.EmitVarDecl(cast<VarDecl>(*I));
50 } else {
51 CodeGenFunction::AutoVarEmission Emission =
52 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53 CGF.EmitAutoVarCleanups(Emission);
54 }
55 }
56 }
57 }
58 }
59 }
60 CodeGenFunction::OMPPrivateScope InlinedShareds;
61
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)62 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63 return CGF.LambdaCaptureFields.lookup(VD) ||
64 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67 }
68
69 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const llvm::Optional<OpenMPDirectiveKind> CapturedRegion=llvm::None,const bool EmitPreInitStmt=true)70 OMPLexicalScope(
71 CodeGenFunction &CGF, const OMPExecutableDirective &S,
72 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73 const bool EmitPreInitStmt = true)
74 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75 InlinedShareds(CGF) {
76 if (EmitPreInitStmt)
77 emitPreInitStmt(CGF, S);
78 if (!CapturedRegion.hasValue())
79 return;
80 assert(S.hasAssociatedStmt() &&
81 "Expected associated statement for inlined directive.");
82 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83 for (const auto &C : CS->captures()) {
84 if (C.capturesVariable() || C.capturesVariableByCopy()) {
85 auto *VD = C.getCapturedVar();
86 assert(VD == VD->getCanonicalDecl() &&
87 "Canonical decl must be captured.");
88 DeclRefExpr DRE(
89 CGF.getContext(), const_cast<VarDecl *>(VD),
90 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91 InlinedShareds.isGlobalVarCaptured(VD)),
92 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94 return CGF.EmitLValue(&DRE).getAddress(CGF);
95 });
96 }
97 }
98 (void)InlinedShareds.Privatize();
99 }
100 };
101
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)105 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106 OpenMPDirectiveKind Kind = S.getDirectiveKind();
107 return !(isOpenMPTargetExecutionDirective(Kind) ||
108 isOpenMPLoopBoundSharingDirective(Kind)) &&
109 isOpenMPParallelDirective(Kind);
110 }
111
112 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)113 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115 EmitPreInitStmt(S)) {}
116 };
117
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)121 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122 OpenMPDirectiveKind Kind = S.getDirectiveKind();
123 return !isOpenMPTargetExecutionDirective(Kind) &&
124 isOpenMPTeamsDirective(Kind);
125 }
126
127 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)128 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130 EmitPreInitStmt(S)) {}
131 };
132
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137 const DeclStmt *PreInits;
138 CodeGenFunction::OMPMapVars PreCondVars;
139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
141 for (const auto *E : LD->counters()) {
142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
143 EmittedAsPrivate.insert(VD->getCanonicalDecl());
144 (void)PreCondVars.setVarAddr(
145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
146 }
147 // Mark private vars as undefs.
148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
149 for (const Expr *IRef : C->varlists()) {
150 const auto *OrigVD =
151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
153 (void)PreCondVars.setVarAddr(
154 CGF, OrigVD,
155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
156 CGF.getContext().getPointerType(
157 OrigVD->getType().getNonReferenceType()))),
158 CGF.getContext().getDeclAlign(OrigVD)));
159 }
160 }
161 }
162 (void)PreCondVars.apply(CGF);
163 // Emit init, __range and __end variables for C++ range loops.
164 (void)OMPLoopBasedDirective::doForAllLoops(
165 LD->getInnermostCapturedStmt()->getCapturedStmt(),
166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
167 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
169 if (const Stmt *Init = CXXFor->getInit())
170 CGF.EmitStmt(Init);
171 CGF.EmitStmt(CXXFor->getRangeStmt());
172 CGF.EmitStmt(CXXFor->getEndStmt());
173 }
174 return false;
175 });
176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
180 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
181 } else {
182 llvm_unreachable("Unknown loop-based directive kind.");
183 }
184 if (PreInits) {
185 for (const auto *I : PreInits->decls())
186 CGF.EmitVarDecl(cast<VarDecl>(*I));
187 }
188 PreCondVars.restore(CGF);
189 }
190
191 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)192 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
193 : CodeGenFunction::RunCleanupsScope(CGF) {
194 emitPreInitStmt(CGF, S);
195 }
196 };
197
198 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
199 CodeGenFunction::OMPPrivateScope InlinedShareds;
200
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)201 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
202 return CGF.LambdaCaptureFields.lookup(VD) ||
203 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
204 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
205 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
206 }
207
208 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)209 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
210 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
211 InlinedShareds(CGF) {
212 for (const auto *C : S.clauses()) {
213 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
214 if (const auto *PreInit =
215 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
216 for (const auto *I : PreInit->decls()) {
217 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
218 CGF.EmitVarDecl(cast<VarDecl>(*I));
219 } else {
220 CodeGenFunction::AutoVarEmission Emission =
221 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
222 CGF.EmitAutoVarCleanups(Emission);
223 }
224 }
225 }
226 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
227 for (const Expr *E : UDP->varlists()) {
228 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
229 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
230 CGF.EmitVarDecl(*OED);
231 }
232 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
233 for (const Expr *E : UDP->varlists()) {
234 const Decl *D = getBaseDecl(E);
235 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
236 CGF.EmitVarDecl(*OED);
237 }
238 }
239 }
240 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
241 CGF.EmitOMPPrivateClause(S, InlinedShareds);
242 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
243 if (const Expr *E = TG->getReductionRef())
244 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
245 }
246 // Temp copy arrays for inscan reductions should not be emitted as they are
247 // not used in simd only mode.
248 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
249 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
250 if (C->getModifier() != OMPC_REDUCTION_inscan)
251 continue;
252 for (const Expr *E : C->copy_array_temps())
253 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
254 }
255 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
256 while (CS) {
257 for (auto &C : CS->captures()) {
258 if (C.capturesVariable() || C.capturesVariableByCopy()) {
259 auto *VD = C.getCapturedVar();
260 if (CopyArrayTemps.contains(VD))
261 continue;
262 assert(VD == VD->getCanonicalDecl() &&
263 "Canonical decl must be captured.");
264 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
265 isCapturedVar(CGF, VD) ||
266 (CGF.CapturedStmtInfo &&
267 InlinedShareds.isGlobalVarCaptured(VD)),
268 VD->getType().getNonReferenceType(), VK_LValue,
269 C.getLocation());
270 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
271 return CGF.EmitLValue(&DRE).getAddress(CGF);
272 });
273 }
274 }
275 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
276 }
277 (void)InlinedShareds.Privatize();
278 }
279 };
280
281 } // namespace
282
283 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
284 const OMPExecutableDirective &S,
285 const RegionCodeGenTy &CodeGen);
286
EmitOMPSharedLValue(const Expr * E)287 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
288 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
289 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
290 OrigVD = OrigVD->getCanonicalDecl();
291 bool IsCaptured =
292 LambdaCaptureFields.lookup(OrigVD) ||
293 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
294 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
295 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
296 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
297 return EmitLValue(&DRE);
298 }
299 }
300 return EmitLValue(E);
301 }
302
getTypeSize(QualType Ty)303 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
304 ASTContext &C = getContext();
305 llvm::Value *Size = nullptr;
306 auto SizeInChars = C.getTypeSizeInChars(Ty);
307 if (SizeInChars.isZero()) {
308 // getTypeSizeInChars() returns 0 for a VLA.
309 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
310 VlaSizePair VlaSize = getVLASize(VAT);
311 Ty = VlaSize.Type;
312 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
313 : VlaSize.NumElts;
314 }
315 SizeInChars = C.getTypeSizeInChars(Ty);
316 if (SizeInChars.isZero())
317 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
318 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
319 }
320 return CGM.getSize(SizeInChars);
321 }
322
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)323 void CodeGenFunction::GenerateOpenMPCapturedVars(
324 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
325 const RecordDecl *RD = S.getCapturedRecordDecl();
326 auto CurField = RD->field_begin();
327 auto CurCap = S.captures().begin();
328 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
329 E = S.capture_init_end();
330 I != E; ++I, ++CurField, ++CurCap) {
331 if (CurField->hasCapturedVLAType()) {
332 const VariableArrayType *VAT = CurField->getCapturedVLAType();
333 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
334 CapturedVars.push_back(Val);
335 } else if (CurCap->capturesThis()) {
336 CapturedVars.push_back(CXXThisValue);
337 } else if (CurCap->capturesVariableByCopy()) {
338 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
339
340 // If the field is not a pointer, we need to save the actual value
341 // and load it as a void pointer.
342 if (!CurField->getType()->isAnyPointerType()) {
343 ASTContext &Ctx = getContext();
344 Address DstAddr = CreateMemTemp(
345 Ctx.getUIntPtrType(),
346 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
347 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
348
349 llvm::Value *SrcAddrVal = EmitScalarConversion(
350 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
351 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
352 LValue SrcLV =
353 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
354
355 // Store the value using the source type pointer.
356 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
357
358 // Load the value using the destination type pointer.
359 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
360 }
361 CapturedVars.push_back(CV);
362 } else {
363 assert(CurCap->capturesVariable() && "Expected capture by reference.");
364 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
365 }
366 }
367 }
368
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)369 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
370 QualType DstType, StringRef Name,
371 LValue AddrLV) {
372 ASTContext &Ctx = CGF.getContext();
373
374 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
375 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
376 Ctx.getPointerType(DstType), Loc);
377 Address TmpAddr =
378 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
379 .getAddress(CGF);
380 return TmpAddr;
381 }
382
getCanonicalParamType(ASTContext & C,QualType T)383 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
384 if (T->isLValueReferenceType())
385 return C.getLValueReferenceType(
386 getCanonicalParamType(C, T.getNonReferenceType()),
387 /*SpelledAsLValue=*/false);
388 if (T->isPointerType())
389 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
390 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
391 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
392 return getCanonicalParamType(C, VLA->getElementType());
393 if (!A->isVariablyModifiedType())
394 return C.getCanonicalType(T);
395 }
396 return C.getCanonicalParamType(T);
397 }
398
399 namespace {
400 /// Contains required data for proper outlined function codegen.
401 struct FunctionOptions {
402 /// Captured statement for which the function is generated.
403 const CapturedStmt *S = nullptr;
404 /// true if cast to/from UIntPtr is required for variables captured by
405 /// value.
406 const bool UIntPtrCastRequired = true;
407 /// true if only casted arguments must be registered as local args or VLA
408 /// sizes.
409 const bool RegisterCastedArgsOnly = false;
410 /// Name of the generated function.
411 const StringRef FunctionName;
412 /// Location of the non-debug version of the outlined function.
413 SourceLocation Loc;
FunctionOptions__anon4f46be5a0511::FunctionOptions414 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
415 bool RegisterCastedArgsOnly, StringRef FunctionName,
416 SourceLocation Loc)
417 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
418 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
419 FunctionName(FunctionName), Loc(Loc) {}
420 };
421 } // namespace
422
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)423 static llvm::Function *emitOutlinedFunctionPrologue(
424 CodeGenFunction &CGF, FunctionArgList &Args,
425 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
426 &LocalAddrs,
427 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
428 &VLASizes,
429 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
430 const CapturedDecl *CD = FO.S->getCapturedDecl();
431 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
432 assert(CD->hasBody() && "missing CapturedDecl body");
433
434 CXXThisValue = nullptr;
435 // Build the argument list.
436 CodeGenModule &CGM = CGF.CGM;
437 ASTContext &Ctx = CGM.getContext();
438 FunctionArgList TargetArgs;
439 Args.append(CD->param_begin(),
440 std::next(CD->param_begin(), CD->getContextParamPosition()));
441 TargetArgs.append(
442 CD->param_begin(),
443 std::next(CD->param_begin(), CD->getContextParamPosition()));
444 auto I = FO.S->captures().begin();
445 FunctionDecl *DebugFunctionDecl = nullptr;
446 if (!FO.UIntPtrCastRequired) {
447 FunctionProtoType::ExtProtoInfo EPI;
448 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
449 DebugFunctionDecl = FunctionDecl::Create(
450 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
451 SourceLocation(), DeclarationName(), FunctionTy,
452 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
453 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
454 }
455 for (const FieldDecl *FD : RD->fields()) {
456 QualType ArgType = FD->getType();
457 IdentifierInfo *II = nullptr;
458 VarDecl *CapVar = nullptr;
459
460 // If this is a capture by copy and the type is not a pointer, the outlined
461 // function argument type should be uintptr and the value properly casted to
462 // uintptr. This is necessary given that the runtime library is only able to
463 // deal with pointers. We can pass in the same way the VLA type sizes to the
464 // outlined function.
465 if (FO.UIntPtrCastRequired &&
466 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
467 I->capturesVariableArrayType()))
468 ArgType = Ctx.getUIntPtrType();
469
470 if (I->capturesVariable() || I->capturesVariableByCopy()) {
471 CapVar = I->getCapturedVar();
472 II = CapVar->getIdentifier();
473 } else if (I->capturesThis()) {
474 II = &Ctx.Idents.get("this");
475 } else {
476 assert(I->capturesVariableArrayType());
477 II = &Ctx.Idents.get("vla");
478 }
479 if (ArgType->isVariablyModifiedType())
480 ArgType = getCanonicalParamType(Ctx, ArgType);
481 VarDecl *Arg;
482 if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
483 Arg = ParmVarDecl::Create(
484 Ctx, DebugFunctionDecl,
485 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
486 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
487 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
488 } else {
489 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
490 II, ArgType, ImplicitParamDecl::Other);
491 }
492 Args.emplace_back(Arg);
493 // Do not cast arguments if we emit function with non-original types.
494 TargetArgs.emplace_back(
495 FO.UIntPtrCastRequired
496 ? Arg
497 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
498 ++I;
499 }
500 Args.append(
501 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
502 CD->param_end());
503 TargetArgs.append(
504 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
505 CD->param_end());
506
507 // Create the function declaration.
508 const CGFunctionInfo &FuncInfo =
509 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
510 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
511
512 auto *F =
513 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
514 FO.FunctionName, &CGM.getModule());
515 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
516 if (CD->isNothrow())
517 F->setDoesNotThrow();
518 F->setDoesNotRecurse();
519
520 // Always inline the outlined function if optimizations are enabled.
521 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
522 F->addFnAttr(llvm::Attribute::AlwaysInline);
523
524 // Generate the function.
525 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
526 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
527 FO.UIntPtrCastRequired ? FO.Loc
528 : CD->getBody()->getBeginLoc());
529 unsigned Cnt = CD->getContextParamPosition();
530 I = FO.S->captures().begin();
531 for (const FieldDecl *FD : RD->fields()) {
532 // Do not map arguments if we emit function with non-original types.
533 Address LocalAddr(Address::invalid());
534 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
535 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
536 TargetArgs[Cnt]);
537 } else {
538 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
539 }
540 // If we are capturing a pointer by copy we don't need to do anything, just
541 // use the value that we get from the arguments.
542 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
543 const VarDecl *CurVD = I->getCapturedVar();
544 if (!FO.RegisterCastedArgsOnly)
545 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
546 ++Cnt;
547 ++I;
548 continue;
549 }
550
551 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
552 AlignmentSource::Decl);
553 if (FD->hasCapturedVLAType()) {
554 if (FO.UIntPtrCastRequired) {
555 ArgLVal = CGF.MakeAddrLValue(
556 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
557 Args[Cnt]->getName(), ArgLVal),
558 FD->getType(), AlignmentSource::Decl);
559 }
560 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
561 const VariableArrayType *VAT = FD->getCapturedVLAType();
562 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
563 } else if (I->capturesVariable()) {
564 const VarDecl *Var = I->getCapturedVar();
565 QualType VarTy = Var->getType();
566 Address ArgAddr = ArgLVal.getAddress(CGF);
567 if (ArgLVal.getType()->isLValueReferenceType()) {
568 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
569 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
570 assert(ArgLVal.getType()->isPointerType());
571 ArgAddr = CGF.EmitLoadOfPointer(
572 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
573 }
574 if (!FO.RegisterCastedArgsOnly) {
575 LocalAddrs.insert(
576 {Args[Cnt],
577 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
578 }
579 } else if (I->capturesVariableByCopy()) {
580 assert(!FD->getType()->isAnyPointerType() &&
581 "Not expecting a captured pointer.");
582 const VarDecl *Var = I->getCapturedVar();
583 LocalAddrs.insert({Args[Cnt],
584 {Var, FO.UIntPtrCastRequired
585 ? castValueFromUintptr(
586 CGF, I->getLocation(), FD->getType(),
587 Args[Cnt]->getName(), ArgLVal)
588 : ArgLVal.getAddress(CGF)}});
589 } else {
590 // If 'this' is captured, load it into CXXThisValue.
591 assert(I->capturesThis());
592 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
593 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
594 }
595 ++Cnt;
596 ++I;
597 }
598
599 return F;
600 }
601
602 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)603 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
604 SourceLocation Loc) {
605 assert(
606 CapturedStmtInfo &&
607 "CapturedStmtInfo should be set when generating the captured function");
608 const CapturedDecl *CD = S.getCapturedDecl();
609 // Build the argument list.
610 bool NeedWrapperFunction =
611 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
612 FunctionArgList Args;
613 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
614 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
615 SmallString<256> Buffer;
616 llvm::raw_svector_ostream Out(Buffer);
617 Out << CapturedStmtInfo->getHelperName();
618 if (NeedWrapperFunction)
619 Out << "_debug__";
620 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
621 Out.str(), Loc);
622 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
623 VLASizes, CXXThisValue, FO);
624 CodeGenFunction::OMPPrivateScope LocalScope(*this);
625 for (const auto &LocalAddrPair : LocalAddrs) {
626 if (LocalAddrPair.second.first) {
627 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
628 return LocalAddrPair.second.second;
629 });
630 }
631 }
632 (void)LocalScope.Privatize();
633 for (const auto &VLASizePair : VLASizes)
634 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
635 PGO.assignRegionCounters(GlobalDecl(CD), F);
636 CapturedStmtInfo->EmitBody(*this, CD->getBody());
637 (void)LocalScope.ForceCleanup();
638 FinishFunction(CD->getBodyRBrace());
639 if (!NeedWrapperFunction)
640 return F;
641
642 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
643 /*RegisterCastedArgsOnly=*/true,
644 CapturedStmtInfo->getHelperName(), Loc);
645 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
646 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
647 Args.clear();
648 LocalAddrs.clear();
649 VLASizes.clear();
650 llvm::Function *WrapperF =
651 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
652 WrapperCGF.CXXThisValue, WrapperFO);
653 llvm::SmallVector<llvm::Value *, 4> CallArgs;
654 auto *PI = F->arg_begin();
655 for (const auto *Arg : Args) {
656 llvm::Value *CallArg;
657 auto I = LocalAddrs.find(Arg);
658 if (I != LocalAddrs.end()) {
659 LValue LV = WrapperCGF.MakeAddrLValue(
660 I->second.second,
661 I->second.first ? I->second.first->getType() : Arg->getType(),
662 AlignmentSource::Decl);
663 if (LV.getType()->isAnyComplexType())
664 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
665 LV.getAddress(WrapperCGF),
666 PI->getType()->getPointerTo(
667 LV.getAddress(WrapperCGF).getAddressSpace())));
668 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
669 } else {
670 auto EI = VLASizes.find(Arg);
671 if (EI != VLASizes.end()) {
672 CallArg = EI->second.second;
673 } else {
674 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
675 Arg->getType(),
676 AlignmentSource::Decl);
677 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
678 }
679 }
680 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
681 ++PI;
682 }
683 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
684 WrapperCGF.FinishFunction();
685 return WrapperF;
686 }
687
688 //===----------------------------------------------------------------------===//
689 // OpenMP Directive Emission
690 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)691 void CodeGenFunction::EmitOMPAggregateAssign(
692 Address DestAddr, Address SrcAddr, QualType OriginalType,
693 const llvm::function_ref<void(Address, Address)> CopyGen) {
694 // Perform element-by-element initialization.
695 QualType ElementTy;
696
697 // Drill down to the base element type on both arrays.
698 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
699 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
700 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
701
702 llvm::Value *SrcBegin = SrcAddr.getPointer();
703 llvm::Value *DestBegin = DestAddr.getPointer();
704 // Cast from pointer to array type to pointer to single element.
705 llvm::Value *DestEnd =
706 Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
707 // The basic structure here is a while-do loop.
708 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
709 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
710 llvm::Value *IsEmpty =
711 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
712 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
713
714 // Enter the loop body, making that address the current address.
715 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
716 EmitBlock(BodyBB);
717
718 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
719
720 llvm::PHINode *SrcElementPHI =
721 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
722 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
723 Address SrcElementCurrent =
724 Address(SrcElementPHI,
725 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726
727 llvm::PHINode *DestElementPHI =
728 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
729 DestElementPHI->addIncoming(DestBegin, EntryBB);
730 Address DestElementCurrent =
731 Address(DestElementPHI,
732 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733
734 // Emit copy.
735 CopyGen(DestElementCurrent, SrcElementCurrent);
736
737 // Shift the address forward by one element.
738 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
739 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
740 "omp.arraycpy.dest.element");
741 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
742 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
743 "omp.arraycpy.src.element");
744 // Check whether we've reached the end.
745 llvm::Value *Done =
746 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
747 Builder.CreateCondBr(Done, DoneBB, BodyBB);
748 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
749 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
750
751 // Done.
752 EmitBlock(DoneBB, /*IsFinished=*/true);
753 }
754
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)755 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
756 Address SrcAddr, const VarDecl *DestVD,
757 const VarDecl *SrcVD, const Expr *Copy) {
758 if (OriginalType->isArrayType()) {
759 const auto *BO = dyn_cast<BinaryOperator>(Copy);
760 if (BO && BO->getOpcode() == BO_Assign) {
761 // Perform simple memcpy for simple copying.
762 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
763 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
764 EmitAggregateAssign(Dest, Src, OriginalType);
765 } else {
766 // For arrays with complex element types perform element by element
767 // copying.
768 EmitOMPAggregateAssign(
769 DestAddr, SrcAddr, OriginalType,
770 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
771 // Working with the single array element, so have to remap
772 // destination and source variables to corresponding array
773 // elements.
774 CodeGenFunction::OMPPrivateScope Remap(*this);
775 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
776 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
777 (void)Remap.Privatize();
778 EmitIgnoredExpr(Copy);
779 });
780 }
781 } else {
782 // Remap pseudo source variable to private copy.
783 CodeGenFunction::OMPPrivateScope Remap(*this);
784 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
785 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
786 (void)Remap.Privatize();
787 // Emit copying of the whole variable.
788 EmitIgnoredExpr(Copy);
789 }
790 }
791
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)792 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
793 OMPPrivateScope &PrivateScope) {
794 if (!HaveInsertPoint())
795 return false;
796 bool DeviceConstTarget =
797 getLangOpts().OpenMPIsDevice &&
798 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
799 bool FirstprivateIsLastprivate = false;
800 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
801 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
802 for (const auto *D : C->varlists())
803 Lastprivates.try_emplace(
804 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
805 C->getKind());
806 }
807 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
808 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
809 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
810 // Force emission of the firstprivate copy if the directive does not emit
811 // outlined function, like omp for, omp simd, omp distribute etc.
812 bool MustEmitFirstprivateCopy =
813 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
814 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
815 const auto *IRef = C->varlist_begin();
816 const auto *InitsRef = C->inits().begin();
817 for (const Expr *IInit : C->private_copies()) {
818 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
819 bool ThisFirstprivateIsLastprivate =
820 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
821 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
822 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
823 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
824 !FD->getType()->isReferenceType() &&
825 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
826 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
827 ++IRef;
828 ++InitsRef;
829 continue;
830 }
831 // Do not emit copy for firstprivate constant variables in target regions,
832 // captured by reference.
833 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
834 FD && FD->getType()->isReferenceType() &&
835 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
836 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
837 ++IRef;
838 ++InitsRef;
839 continue;
840 }
841 FirstprivateIsLastprivate =
842 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
843 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
844 const auto *VDInit =
845 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
846 bool IsRegistered;
847 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
848 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
849 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
850 LValue OriginalLVal;
851 if (!FD) {
852 // Check if the firstprivate variable is just a constant value.
853 ConstantEmission CE = tryEmitAsConstant(&DRE);
854 if (CE && !CE.isReference()) {
855 // Constant value, no need to create a copy.
856 ++IRef;
857 ++InitsRef;
858 continue;
859 }
860 if (CE && CE.isReference()) {
861 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
862 } else {
863 assert(!CE && "Expected non-constant firstprivate.");
864 OriginalLVal = EmitLValue(&DRE);
865 }
866 } else {
867 OriginalLVal = EmitLValue(&DRE);
868 }
869 QualType Type = VD->getType();
870 if (Type->isArrayType()) {
871 // Emit VarDecl with copy init for arrays.
872 // Get the address of the original variable captured in current
873 // captured region.
874 IsRegistered = PrivateScope.addPrivate(
875 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
876 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
877 const Expr *Init = VD->getInit();
878 if (!isa<CXXConstructExpr>(Init) ||
879 isTrivialInitializer(Init)) {
880 // Perform simple memcpy.
881 LValue Dest =
882 MakeAddrLValue(Emission.getAllocatedAddress(), Type);
883 EmitAggregateAssign(Dest, OriginalLVal, Type);
884 } else {
885 EmitOMPAggregateAssign(
886 Emission.getAllocatedAddress(),
887 OriginalLVal.getAddress(*this), Type,
888 [this, VDInit, Init](Address DestElement,
889 Address SrcElement) {
890 // Clean up any temporaries needed by the
891 // initialization.
892 RunCleanupsScope InitScope(*this);
893 // Emit initialization for single element.
894 setAddrOfLocalVar(VDInit, SrcElement);
895 EmitAnyExprToMem(Init, DestElement,
896 Init->getType().getQualifiers(),
897 /*IsInitializer*/ false);
898 LocalDeclMap.erase(VDInit);
899 });
900 }
901 EmitAutoVarCleanups(Emission);
902 return Emission.getAllocatedAddress();
903 });
904 } else {
905 Address OriginalAddr = OriginalLVal.getAddress(*this);
906 IsRegistered =
907 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
908 ThisFirstprivateIsLastprivate,
909 OrigVD, &Lastprivates, IRef]() {
910 // Emit private VarDecl with copy init.
911 // Remap temp VDInit variable to the address of the original
912 // variable (for proper handling of captured global variables).
913 setAddrOfLocalVar(VDInit, OriginalAddr);
914 EmitDecl(*VD);
915 LocalDeclMap.erase(VDInit);
916 if (ThisFirstprivateIsLastprivate &&
917 Lastprivates[OrigVD->getCanonicalDecl()] ==
918 OMPC_LASTPRIVATE_conditional) {
919 // Create/init special variable for lastprivate conditionals.
920 Address VDAddr =
921 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
922 *this, OrigVD);
923 llvm::Value *V = EmitLoadOfScalar(
924 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
925 AlignmentSource::Decl),
926 (*IRef)->getExprLoc());
927 EmitStoreOfScalar(V,
928 MakeAddrLValue(VDAddr, (*IRef)->getType(),
929 AlignmentSource::Decl));
930 LocalDeclMap.erase(VD);
931 setAddrOfLocalVar(VD, VDAddr);
932 return VDAddr;
933 }
934 return GetAddrOfLocalVar(VD);
935 });
936 }
937 assert(IsRegistered &&
938 "firstprivate var already registered as private");
939 // Silence the warning about unused variable.
940 (void)IsRegistered;
941 }
942 ++IRef;
943 ++InitsRef;
944 }
945 }
946 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
947 }
948
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)949 void CodeGenFunction::EmitOMPPrivateClause(
950 const OMPExecutableDirective &D,
951 CodeGenFunction::OMPPrivateScope &PrivateScope) {
952 if (!HaveInsertPoint())
953 return;
954 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
955 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
956 auto IRef = C->varlist_begin();
957 for (const Expr *IInit : C->private_copies()) {
958 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
959 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
961 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
962 // Emit private VarDecl with copy init.
963 EmitDecl(*VD);
964 return GetAddrOfLocalVar(VD);
965 });
966 assert(IsRegistered && "private var already registered as private");
967 // Silence the warning about unused variable.
968 (void)IsRegistered;
969 }
970 ++IRef;
971 }
972 }
973 }
974
EmitOMPCopyinClause(const OMPExecutableDirective & D)975 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
976 if (!HaveInsertPoint())
977 return false;
978 // threadprivate_var1 = master_threadprivate_var1;
979 // operator=(threadprivate_var2, master_threadprivate_var2);
980 // ...
981 // __kmpc_barrier(&loc, global_tid);
982 llvm::DenseSet<const VarDecl *> CopiedVars;
983 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
984 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
985 auto IRef = C->varlist_begin();
986 auto ISrcRef = C->source_exprs().begin();
987 auto IDestRef = C->destination_exprs().begin();
988 for (const Expr *AssignOp : C->assignment_ops()) {
989 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
990 QualType Type = VD->getType();
991 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
992 // Get the address of the master variable. If we are emitting code with
993 // TLS support, the address is passed from the master as field in the
994 // captured declaration.
995 Address MasterAddr = Address::invalid();
996 if (getLangOpts().OpenMPUseTLS &&
997 getContext().getTargetInfo().isTLSSupported()) {
998 assert(CapturedStmtInfo->lookup(VD) &&
999 "Copyin threadprivates should have been captured!");
1000 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1001 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1002 MasterAddr = EmitLValue(&DRE).getAddress(*this);
1003 LocalDeclMap.erase(VD);
1004 } else {
1005 MasterAddr =
1006 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1007 : CGM.GetAddrOfGlobal(VD),
1008 getContext().getDeclAlign(VD));
1009 }
1010 // Get the address of the threadprivate variable.
1011 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1012 if (CopiedVars.size() == 1) {
1013 // At first check if current thread is a master thread. If it is, no
1014 // need to copy data.
1015 CopyBegin = createBasicBlock("copyin.not.master");
1016 CopyEnd = createBasicBlock("copyin.not.master.end");
1017 // TODO: Avoid ptrtoint conversion.
1018 auto *MasterAddrInt =
1019 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1020 auto *PrivateAddrInt =
1021 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1022 Builder.CreateCondBr(
1023 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1024 CopyEnd);
1025 EmitBlock(CopyBegin);
1026 }
1027 const auto *SrcVD =
1028 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1029 const auto *DestVD =
1030 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1031 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1032 }
1033 ++IRef;
1034 ++ISrcRef;
1035 ++IDestRef;
1036 }
1037 }
1038 if (CopyEnd) {
1039 // Exit out of copying procedure for non-master thread.
1040 EmitBlock(CopyEnd, /*IsFinished=*/true);
1041 return true;
1042 }
1043 return false;
1044 }
1045
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1046 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1047 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1048 if (!HaveInsertPoint())
1049 return false;
1050 bool HasAtLeastOneLastprivate = false;
1051 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1052 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1053 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1054 for (const Expr *C : LoopDirective->counters()) {
1055 SIMDLCVs.insert(
1056 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1057 }
1058 }
1059 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1060 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1061 HasAtLeastOneLastprivate = true;
1062 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1063 !getLangOpts().OpenMPSimd)
1064 break;
1065 const auto *IRef = C->varlist_begin();
1066 const auto *IDestRef = C->destination_exprs().begin();
1067 for (const Expr *IInit : C->private_copies()) {
1068 // Keep the address of the original variable for future update at the end
1069 // of the loop.
1070 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1071 // Taskloops do not require additional initialization, it is done in
1072 // runtime support library.
1073 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1074 const auto *DestVD =
1075 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1076 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1077 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1078 /*RefersToEnclosingVariableOrCapture=*/
1079 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1080 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1081 return EmitLValue(&DRE).getAddress(*this);
1082 });
1083 // Check if the variable is also a firstprivate: in this case IInit is
1084 // not generated. Initialization of this variable will happen in codegen
1085 // for 'firstprivate' clause.
1086 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1087 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1088 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1089 OrigVD]() {
1090 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1091 Address VDAddr =
1092 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1093 OrigVD);
1094 setAddrOfLocalVar(VD, VDAddr);
1095 return VDAddr;
1096 }
1097 // Emit private VarDecl with copy init.
1098 EmitDecl(*VD);
1099 return GetAddrOfLocalVar(VD);
1100 });
1101 assert(IsRegistered &&
1102 "lastprivate var already registered as private");
1103 (void)IsRegistered;
1104 }
1105 }
1106 ++IRef;
1107 ++IDestRef;
1108 }
1109 }
1110 return HasAtLeastOneLastprivate;
1111 }
1112
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1113 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1114 const OMPExecutableDirective &D, bool NoFinals,
1115 llvm::Value *IsLastIterCond) {
1116 if (!HaveInsertPoint())
1117 return;
1118 // Emit following code:
1119 // if (<IsLastIterCond>) {
1120 // orig_var1 = private_orig_var1;
1121 // ...
1122 // orig_varn = private_orig_varn;
1123 // }
1124 llvm::BasicBlock *ThenBB = nullptr;
1125 llvm::BasicBlock *DoneBB = nullptr;
1126 if (IsLastIterCond) {
1127 // Emit implicit barrier if at least one lastprivate conditional is found
1128 // and this is not a simd mode.
1129 if (!getLangOpts().OpenMPSimd &&
1130 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1131 [](const OMPLastprivateClause *C) {
1132 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1133 })) {
1134 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1135 OMPD_unknown,
1136 /*EmitChecks=*/false,
1137 /*ForceSimpleCall=*/true);
1138 }
1139 ThenBB = createBasicBlock(".omp.lastprivate.then");
1140 DoneBB = createBasicBlock(".omp.lastprivate.done");
1141 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1142 EmitBlock(ThenBB);
1143 }
1144 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1145 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1146 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1147 auto IC = LoopDirective->counters().begin();
1148 for (const Expr *F : LoopDirective->finals()) {
1149 const auto *D =
1150 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1151 if (NoFinals)
1152 AlreadyEmittedVars.insert(D);
1153 else
1154 LoopCountersAndUpdates[D] = F;
1155 ++IC;
1156 }
1157 }
1158 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1159 auto IRef = C->varlist_begin();
1160 auto ISrcRef = C->source_exprs().begin();
1161 auto IDestRef = C->destination_exprs().begin();
1162 for (const Expr *AssignOp : C->assignment_ops()) {
1163 const auto *PrivateVD =
1164 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1165 QualType Type = PrivateVD->getType();
1166 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1167 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1168 // If lastprivate variable is a loop control variable for loop-based
1169 // directive, update its value before copyin back to original
1170 // variable.
1171 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1172 EmitIgnoredExpr(FinalExpr);
1173 const auto *SrcVD =
1174 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1175 const auto *DestVD =
1176 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1177 // Get the address of the private variable.
1178 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1179 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1180 PrivateAddr =
1181 Address(Builder.CreateLoad(PrivateAddr),
1182 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1183 // Store the last value to the private copy in the last iteration.
1184 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1185 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1186 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1187 (*IRef)->getExprLoc());
1188 // Get the address of the original variable.
1189 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1190 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1191 }
1192 ++IRef;
1193 ++ISrcRef;
1194 ++IDestRef;
1195 }
1196 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1197 EmitIgnoredExpr(PostUpdate);
1198 }
1199 if (IsLastIterCond)
1200 EmitBlock(DoneBB, /*IsFinished=*/true);
1201 }
1202
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1203 void CodeGenFunction::EmitOMPReductionClauseInit(
1204 const OMPExecutableDirective &D,
1205 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1206 if (!HaveInsertPoint())
1207 return;
1208 SmallVector<const Expr *, 4> Shareds;
1209 SmallVector<const Expr *, 4> Privates;
1210 SmallVector<const Expr *, 4> ReductionOps;
1211 SmallVector<const Expr *, 4> LHSs;
1212 SmallVector<const Expr *, 4> RHSs;
1213 OMPTaskDataTy Data;
1214 SmallVector<const Expr *, 4> TaskLHSs;
1215 SmallVector<const Expr *, 4> TaskRHSs;
1216 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1217 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1218 continue;
1219 Shareds.append(C->varlist_begin(), C->varlist_end());
1220 Privates.append(C->privates().begin(), C->privates().end());
1221 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1222 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1223 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1224 if (C->getModifier() == OMPC_REDUCTION_task) {
1225 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1226 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1227 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1228 Data.ReductionOps.append(C->reduction_ops().begin(),
1229 C->reduction_ops().end());
1230 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1231 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1232 }
1233 }
1234 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1235 unsigned Count = 0;
1236 auto *ILHS = LHSs.begin();
1237 auto *IRHS = RHSs.begin();
1238 auto *IPriv = Privates.begin();
1239 for (const Expr *IRef : Shareds) {
1240 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1241 // Emit private VarDecl with reduction init.
1242 RedCG.emitSharedOrigLValue(*this, Count);
1243 RedCG.emitAggregateType(*this, Count);
1244 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1245 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1246 RedCG.getSharedLValue(Count),
1247 [&Emission](CodeGenFunction &CGF) {
1248 CGF.EmitAutoVarInit(Emission);
1249 return true;
1250 });
1251 EmitAutoVarCleanups(Emission);
1252 Address BaseAddr = RedCG.adjustPrivateAddress(
1253 *this, Count, Emission.getAllocatedAddress());
1254 bool IsRegistered = PrivateScope.addPrivate(
1255 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1256 assert(IsRegistered && "private var already registered as private");
1257 // Silence the warning about unused variable.
1258 (void)IsRegistered;
1259
1260 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1261 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1262 QualType Type = PrivateVD->getType();
1263 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1264 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1265 // Store the address of the original variable associated with the LHS
1266 // implicit variable.
1267 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1268 return RedCG.getSharedLValue(Count).getAddress(*this);
1269 });
1270 PrivateScope.addPrivate(
1271 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1272 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1273 isa<ArraySubscriptExpr>(IRef)) {
1274 // Store the address of the original variable associated with the LHS
1275 // implicit variable.
1276 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1277 return RedCG.getSharedLValue(Count).getAddress(*this);
1278 });
1279 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1280 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1281 ConvertTypeForMem(RHSVD->getType()),
1282 "rhs.begin");
1283 });
1284 } else {
1285 QualType Type = PrivateVD->getType();
1286 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1287 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1288 // Store the address of the original variable associated with the LHS
1289 // implicit variable.
1290 if (IsArray) {
1291 OriginalAddr = Builder.CreateElementBitCast(
1292 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1293 }
1294 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1295 PrivateScope.addPrivate(
1296 RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1297 return IsArray
1298 ? Builder.CreateElementBitCast(
1299 GetAddrOfLocalVar(PrivateVD),
1300 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1301 : GetAddrOfLocalVar(PrivateVD);
1302 });
1303 }
1304 ++ILHS;
1305 ++IRHS;
1306 ++IPriv;
1307 ++Count;
1308 }
1309 if (!Data.ReductionVars.empty()) {
1310 Data.IsReductionWithTaskMod = true;
1311 Data.IsWorksharingReduction =
1312 isOpenMPWorksharingDirective(D.getDirectiveKind());
1313 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1314 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1315 const Expr *TaskRedRef = nullptr;
1316 switch (D.getDirectiveKind()) {
1317 case OMPD_parallel:
1318 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1319 break;
1320 case OMPD_for:
1321 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1322 break;
1323 case OMPD_sections:
1324 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1325 break;
1326 case OMPD_parallel_for:
1327 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1328 break;
1329 case OMPD_parallel_master:
1330 TaskRedRef =
1331 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1332 break;
1333 case OMPD_parallel_sections:
1334 TaskRedRef =
1335 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1336 break;
1337 case OMPD_target_parallel:
1338 TaskRedRef =
1339 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1340 break;
1341 case OMPD_target_parallel_for:
1342 TaskRedRef =
1343 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1344 break;
1345 case OMPD_distribute_parallel_for:
1346 TaskRedRef =
1347 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1348 break;
1349 case OMPD_teams_distribute_parallel_for:
1350 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1351 .getTaskReductionRefExpr();
1352 break;
1353 case OMPD_target_teams_distribute_parallel_for:
1354 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1355 .getTaskReductionRefExpr();
1356 break;
1357 case OMPD_simd:
1358 case OMPD_for_simd:
1359 case OMPD_section:
1360 case OMPD_single:
1361 case OMPD_master:
1362 case OMPD_critical:
1363 case OMPD_parallel_for_simd:
1364 case OMPD_task:
1365 case OMPD_taskyield:
1366 case OMPD_barrier:
1367 case OMPD_taskwait:
1368 case OMPD_taskgroup:
1369 case OMPD_flush:
1370 case OMPD_depobj:
1371 case OMPD_scan:
1372 case OMPD_ordered:
1373 case OMPD_atomic:
1374 case OMPD_teams:
1375 case OMPD_target:
1376 case OMPD_cancellation_point:
1377 case OMPD_cancel:
1378 case OMPD_target_data:
1379 case OMPD_target_enter_data:
1380 case OMPD_target_exit_data:
1381 case OMPD_taskloop:
1382 case OMPD_taskloop_simd:
1383 case OMPD_master_taskloop:
1384 case OMPD_master_taskloop_simd:
1385 case OMPD_parallel_master_taskloop:
1386 case OMPD_parallel_master_taskloop_simd:
1387 case OMPD_distribute:
1388 case OMPD_target_update:
1389 case OMPD_distribute_parallel_for_simd:
1390 case OMPD_distribute_simd:
1391 case OMPD_target_parallel_for_simd:
1392 case OMPD_target_simd:
1393 case OMPD_teams_distribute:
1394 case OMPD_teams_distribute_simd:
1395 case OMPD_teams_distribute_parallel_for_simd:
1396 case OMPD_target_teams:
1397 case OMPD_target_teams_distribute:
1398 case OMPD_target_teams_distribute_parallel_for_simd:
1399 case OMPD_target_teams_distribute_simd:
1400 case OMPD_declare_target:
1401 case OMPD_end_declare_target:
1402 case OMPD_threadprivate:
1403 case OMPD_allocate:
1404 case OMPD_declare_reduction:
1405 case OMPD_declare_mapper:
1406 case OMPD_declare_simd:
1407 case OMPD_requires:
1408 case OMPD_declare_variant:
1409 case OMPD_begin_declare_variant:
1410 case OMPD_end_declare_variant:
1411 case OMPD_unknown:
1412 default:
1413 llvm_unreachable("Enexpected directive with task reductions.");
1414 }
1415
1416 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1417 EmitVarDecl(*VD);
1418 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1419 /*Volatile=*/false, TaskRedRef->getType());
1420 }
1421 }
1422
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1423 void CodeGenFunction::EmitOMPReductionClauseFinal(
1424 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1425 if (!HaveInsertPoint())
1426 return;
1427 llvm::SmallVector<const Expr *, 8> Privates;
1428 llvm::SmallVector<const Expr *, 8> LHSExprs;
1429 llvm::SmallVector<const Expr *, 8> RHSExprs;
1430 llvm::SmallVector<const Expr *, 8> ReductionOps;
1431 bool HasAtLeastOneReduction = false;
1432 bool IsReductionWithTaskMod = false;
1433 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1434 // Do not emit for inscan reductions.
1435 if (C->getModifier() == OMPC_REDUCTION_inscan)
1436 continue;
1437 HasAtLeastOneReduction = true;
1438 Privates.append(C->privates().begin(), C->privates().end());
1439 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1440 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1441 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1442 IsReductionWithTaskMod =
1443 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1444 }
1445 if (HasAtLeastOneReduction) {
1446 if (IsReductionWithTaskMod) {
1447 CGM.getOpenMPRuntime().emitTaskReductionFini(
1448 *this, D.getBeginLoc(),
1449 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1450 }
1451 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1452 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1453 ReductionKind == OMPD_simd;
1454 bool SimpleReduction = ReductionKind == OMPD_simd;
1455 // Emit nowait reduction if nowait clause is present or directive is a
1456 // parallel directive (it always has implicit barrier).
1457 CGM.getOpenMPRuntime().emitReduction(
1458 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1459 {WithNowait, SimpleReduction, ReductionKind});
1460 }
1461 }
1462
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1463 static void emitPostUpdateForReductionClause(
1464 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1465 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1466 if (!CGF.HaveInsertPoint())
1467 return;
1468 llvm::BasicBlock *DoneBB = nullptr;
1469 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1470 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1471 if (!DoneBB) {
1472 if (llvm::Value *Cond = CondGen(CGF)) {
1473 // If the first post-update expression is found, emit conditional
1474 // block if it was requested.
1475 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1476 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1477 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1478 CGF.EmitBlock(ThenBB);
1479 }
1480 }
1481 CGF.EmitIgnoredExpr(PostUpdate);
1482 }
1483 }
1484 if (DoneBB)
1485 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1486 }
1487
1488 namespace {
1489 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1490 /// parallel function. This is necessary for combined constructs such as
1491 /// 'distribute parallel for'
1492 typedef llvm::function_ref<void(CodeGenFunction &,
1493 const OMPExecutableDirective &,
1494 llvm::SmallVectorImpl<llvm::Value *> &)>
1495 CodeGenBoundParametersTy;
1496 } // anonymous namespace
1497
1498 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1499 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1500 const OMPExecutableDirective &S) {
1501 if (CGF.getLangOpts().OpenMP < 50)
1502 return;
1503 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1504 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1505 for (const Expr *Ref : C->varlists()) {
1506 if (!Ref->getType()->isScalarType())
1507 continue;
1508 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1509 if (!DRE)
1510 continue;
1511 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1512 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1513 }
1514 }
1515 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1516 for (const Expr *Ref : C->varlists()) {
1517 if (!Ref->getType()->isScalarType())
1518 continue;
1519 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1520 if (!DRE)
1521 continue;
1522 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1523 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1524 }
1525 }
1526 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1527 for (const Expr *Ref : C->varlists()) {
1528 if (!Ref->getType()->isScalarType())
1529 continue;
1530 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1531 if (!DRE)
1532 continue;
1533 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1534 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1535 }
1536 }
1537 // Privates should ne analyzed since they are not captured at all.
1538 // Task reductions may be skipped - tasks are ignored.
1539 // Firstprivates do not return value but may be passed by reference - no need
1540 // to check for updated lastprivate conditional.
1541 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1542 for (const Expr *Ref : C->varlists()) {
1543 if (!Ref->getType()->isScalarType())
1544 continue;
1545 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1546 if (!DRE)
1547 continue;
1548 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1549 }
1550 }
1551 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1552 CGF, S, PrivateDecls);
1553 }
1554
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1555 static void emitCommonOMPParallelDirective(
1556 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1557 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1558 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1559 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1560 llvm::Function *OutlinedFn =
1561 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1562 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1563 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1564 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1565 llvm::Value *NumThreads =
1566 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1567 /*IgnoreResultAssign=*/true);
1568 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1569 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1570 }
1571 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1572 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1573 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1574 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1575 }
1576 const Expr *IfCond = nullptr;
1577 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1578 if (C->getNameModifier() == OMPD_unknown ||
1579 C->getNameModifier() == OMPD_parallel) {
1580 IfCond = C->getCondition();
1581 break;
1582 }
1583 }
1584
1585 OMPParallelScope Scope(CGF, S);
1586 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1587 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1588 // lower and upper bounds with the pragma 'for' chunking mechanism.
1589 // The following lambda takes care of appending the lower and upper bound
1590 // parameters when necessary
1591 CodeGenBoundParameters(CGF, S, CapturedVars);
1592 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1593 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1594 CapturedVars, IfCond);
1595 }
1596
isAllocatableDecl(const VarDecl * VD)1597 static bool isAllocatableDecl(const VarDecl *VD) {
1598 const VarDecl *CVD = VD->getCanonicalDecl();
1599 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1600 return false;
1601 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1602 // Use the default allocation.
1603 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1604 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1605 !AA->getAllocator());
1606 }
1607
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1608 static void emitEmptyBoundParameters(CodeGenFunction &,
1609 const OMPExecutableDirective &,
1610 llvm::SmallVectorImpl<llvm::Value *> &) {}
1611
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1612 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1613 CodeGenFunction &CGF, const VarDecl *VD) {
1614 CodeGenModule &CGM = CGF.CGM;
1615 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1616
1617 if (!VD)
1618 return Address::invalid();
1619 const VarDecl *CVD = VD->getCanonicalDecl();
1620 if (!isAllocatableDecl(CVD))
1621 return Address::invalid();
1622 llvm::Value *Size;
1623 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1624 if (CVD->getType()->isVariablyModifiedType()) {
1625 Size = CGF.getTypeSize(CVD->getType());
1626 // Align the size: ((size + align - 1) / align) * align
1627 Size = CGF.Builder.CreateNUWAdd(
1628 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1629 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1630 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1631 } else {
1632 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1633 Size = CGM.getSize(Sz.alignTo(Align));
1634 }
1635
1636 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1637 assert(AA->getAllocator() &&
1638 "Expected allocator expression for non-default allocator.");
1639 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1640 // According to the standard, the original allocator type is a enum (integer).
1641 // Convert to pointer type, if required.
1642 if (Allocator->getType()->isIntegerTy())
1643 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1644 else if (Allocator->getType()->isPointerTy())
1645 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1646 CGM.VoidPtrTy);
1647
1648 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1649 CGF.Builder, Size, Allocator,
1650 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1651 llvm::CallInst *FreeCI =
1652 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1653
1654 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1655 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1656 Addr,
1657 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1658 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1659 return Address(Addr, Align);
1660 }
1661
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1662 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1663 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1664 SourceLocation Loc) {
1665 CodeGenModule &CGM = CGF.CGM;
1666 if (CGM.getLangOpts().OpenMPUseTLS &&
1667 CGM.getContext().getTargetInfo().isTLSSupported())
1668 return VDAddr;
1669
1670 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1671
1672 llvm::Type *VarTy = VDAddr.getElementType();
1673 llvm::Value *Data =
1674 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1675 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1676 std::string Suffix = getNameWithSeparators({"cache", ""});
1677 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1678
1679 llvm::CallInst *ThreadPrivateCacheCall =
1680 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1681
1682 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1683 }
1684
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1685 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1686 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1687 SmallString<128> Buffer;
1688 llvm::raw_svector_ostream OS(Buffer);
1689 StringRef Sep = FirstSeparator;
1690 for (StringRef Part : Parts) {
1691 OS << Sep << Part;
1692 Sep = Separator;
1693 }
1694 return OS.str().str();
1695 }
EmitOMPParallelDirective(const OMPParallelDirective & S)1696 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1697 if (CGM.getLangOpts().OpenMPIRBuilder) {
1698 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1699 // Check if we have any if clause associated with the directive.
1700 llvm::Value *IfCond = nullptr;
1701 if (const auto *C = S.getSingleClause<OMPIfClause>())
1702 IfCond = EmitScalarExpr(C->getCondition(),
1703 /*IgnoreResultAssign=*/true);
1704
1705 llvm::Value *NumThreads = nullptr;
1706 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1707 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1708 /*IgnoreResultAssign=*/true);
1709
1710 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1711 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1712 ProcBind = ProcBindClause->getProcBindKind();
1713
1714 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1715
1716 // The cleanup callback that finalizes all variabels at the given location,
1717 // thus calls destructors etc.
1718 auto FiniCB = [this](InsertPointTy IP) {
1719 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1720 };
1721
1722 // Privatization callback that performs appropriate action for
1723 // shared/private/firstprivate/lastprivate/copyin/... variables.
1724 //
1725 // TODO: This defaults to shared right now.
1726 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1727 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1728 // The next line is appropriate only for variables (Val) with the
1729 // data-sharing attribute "shared".
1730 ReplVal = &Val;
1731
1732 return CodeGenIP;
1733 };
1734
1735 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1736 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1737
1738 auto BodyGenCB = [ParallelRegionBodyStmt,
1739 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1740 llvm::BasicBlock &ContinuationBB) {
1741 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1742 ContinuationBB);
1743 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1744 CodeGenIP, ContinuationBB);
1745 };
1746
1747 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1748 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1749 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1750 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1751 Builder.restoreIP(
1752 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1753 IfCond, NumThreads, ProcBind, S.hasCancel()));
1754 return;
1755 }
1756
1757 // Emit parallel region as a standalone region.
1758 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1759 Action.Enter(CGF);
1760 OMPPrivateScope PrivateScope(CGF);
1761 bool Copyins = CGF.EmitOMPCopyinClause(S);
1762 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1763 if (Copyins) {
1764 // Emit implicit barrier to synchronize threads and avoid data races on
1765 // propagation master's thread values of threadprivate variables to local
1766 // instances of that variables of all other implicit threads.
1767 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1768 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1769 /*ForceSimpleCall=*/true);
1770 }
1771 CGF.EmitOMPPrivateClause(S, PrivateScope);
1772 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1773 (void)PrivateScope.Privatize();
1774 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1775 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1776 };
1777 {
1778 auto LPCRegion =
1779 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1780 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1781 emitEmptyBoundParameters);
1782 emitPostUpdateForReductionClause(*this, S,
1783 [](CodeGenFunction &) { return nullptr; });
1784 }
1785 // Check for outer lastprivate conditional update.
1786 checkForLastprivateConditionalUpdate(*this, S);
1787 }
1788
1789 namespace {
1790 /// RAII to handle scopes for loop transformation directives.
1791 class OMPTransformDirectiveScopeRAII {
1792 OMPLoopScope *Scope = nullptr;
1793 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1794 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1795
1796 public:
OMPTransformDirectiveScopeRAII(CodeGenFunction & CGF,const Stmt * S)1797 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1798 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1799 Scope = new OMPLoopScope(CGF, *Dir);
1800 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1801 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1802 }
1803 }
~OMPTransformDirectiveScopeRAII()1804 ~OMPTransformDirectiveScopeRAII() {
1805 if (!Scope)
1806 return;
1807 delete CapInfoRAII;
1808 delete CGSI;
1809 delete Scope;
1810 }
1811 };
1812 } // namespace
1813
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1814 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1815 int MaxLevel, int Level = 0) {
1816 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1817 const Stmt *SimplifiedS = S->IgnoreContainers();
1818 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1819 PrettyStackTraceLoc CrashInfo(
1820 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1821 "LLVM IR generation of compound statement ('{}')");
1822
1823 // Keep track of the current cleanup stack depth, including debug scopes.
1824 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1825 for (const Stmt *CurStmt : CS->body())
1826 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1827 return;
1828 }
1829 if (SimplifiedS == NextLoop) {
1830 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1831 SimplifiedS = Dir->getTransformedStmt();
1832 if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
1833 SimplifiedS = Dir->getTransformedStmt();
1834 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1835 SimplifiedS = CanonLoop->getLoopStmt();
1836 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1837 S = For->getBody();
1838 } else {
1839 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1840 "Expected canonical for loop or range-based for loop.");
1841 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1842 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1843 S = CXXFor->getBody();
1844 }
1845 if (Level + 1 < MaxLevel) {
1846 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1847 S, /*TryImperfectlyNestedLoops=*/true);
1848 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1849 return;
1850 }
1851 }
1852 CGF.EmitStmt(S);
1853 }
1854
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1855 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1856 JumpDest LoopExit) {
1857 RunCleanupsScope BodyScope(*this);
1858 // Update counters values on current iteration.
1859 for (const Expr *UE : D.updates())
1860 EmitIgnoredExpr(UE);
1861 // Update the linear variables.
1862 // In distribute directives only loop counters may be marked as linear, no
1863 // need to generate the code for them.
1864 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1865 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1866 for (const Expr *UE : C->updates())
1867 EmitIgnoredExpr(UE);
1868 }
1869 }
1870
1871 // On a continue in the body, jump to the end.
1872 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1873 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1874 for (const Expr *E : D.finals_conditions()) {
1875 if (!E)
1876 continue;
1877 // Check that loop counter in non-rectangular nest fits into the iteration
1878 // space.
1879 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1880 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1881 getProfileCount(D.getBody()));
1882 EmitBlock(NextBB);
1883 }
1884
1885 OMPPrivateScope InscanScope(*this);
1886 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1887 bool IsInscanRegion = InscanScope.Privatize();
1888 if (IsInscanRegion) {
1889 // Need to remember the block before and after scan directive
1890 // to dispatch them correctly depending on the clause used in
1891 // this directive, inclusive or exclusive. For inclusive scan the natural
1892 // order of the blocks is used, for exclusive clause the blocks must be
1893 // executed in reverse order.
1894 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1895 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1896 // No need to allocate inscan exit block, in simd mode it is selected in the
1897 // codegen for the scan directive.
1898 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1899 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1900 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1901 EmitBranch(OMPScanDispatch);
1902 EmitBlock(OMPBeforeScanBlock);
1903 }
1904
1905 // Emit loop variables for C++ range loops.
1906 const Stmt *Body =
1907 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1908 // Emit loop body.
1909 emitBody(*this, Body,
1910 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1911 Body, /*TryImperfectlyNestedLoops=*/true),
1912 D.getLoopsNumber());
1913
1914 // Jump to the dispatcher at the end of the loop body.
1915 if (IsInscanRegion)
1916 EmitBranch(OMPScanExitBlock);
1917
1918 // The end (updates/cleanups).
1919 EmitBlock(Continue.getBlock());
1920 BreakContinueStack.pop_back();
1921 }
1922
1923 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1924
1925 /// Emit a captured statement and return the function as well as its captured
1926 /// closure context.
emitCapturedStmtFunc(CodeGenFunction & ParentCGF,const CapturedStmt * S)1927 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1928 const CapturedStmt *S) {
1929 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1930 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1931 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1932 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1933 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1934 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1935
1936 return {F, CapStruct.getPointer(ParentCGF)};
1937 }
1938
1939 /// Emit a call to a previously captured closure.
1940 static llvm::CallInst *
emitCapturedStmtCall(CodeGenFunction & ParentCGF,EmittedClosureTy Cap,llvm::ArrayRef<llvm::Value * > Args)1941 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1942 llvm::ArrayRef<llvm::Value *> Args) {
1943 // Append the closure context to the argument.
1944 SmallVector<llvm::Value *> EffectiveArgs;
1945 EffectiveArgs.reserve(Args.size() + 1);
1946 llvm::append_range(EffectiveArgs, Args);
1947 EffectiveArgs.push_back(Cap.second);
1948
1949 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1950 }
1951
1952 llvm::CanonicalLoopInfo *
EmitOMPCollapsedCanonicalLoopNest(const Stmt * S,int Depth)1953 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1954 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1955
1956 EmitStmt(S);
1957 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1958
1959 // The last added loop is the outermost one.
1960 return OMPLoopNestStack.back();
1961 }
1962
EmitOMPCanonicalLoop(const OMPCanonicalLoop * S)1963 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1964 const Stmt *SyntacticalLoop = S->getLoopStmt();
1965 if (!getLangOpts().OpenMPIRBuilder) {
1966 // Ignore if OpenMPIRBuilder is not enabled.
1967 EmitStmt(SyntacticalLoop);
1968 return;
1969 }
1970
1971 LexicalScope ForScope(*this, S->getSourceRange());
1972
1973 // Emit init statements. The Distance/LoopVar funcs may reference variable
1974 // declarations they contain.
1975 const Stmt *BodyStmt;
1976 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1977 if (const Stmt *InitStmt = For->getInit())
1978 EmitStmt(InitStmt);
1979 BodyStmt = For->getBody();
1980 } else if (const auto *RangeFor =
1981 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
1982 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
1983 EmitStmt(RangeStmt);
1984 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
1985 EmitStmt(BeginStmt);
1986 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
1987 EmitStmt(EndStmt);
1988 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
1989 EmitStmt(LoopVarStmt);
1990 BodyStmt = RangeFor->getBody();
1991 } else
1992 llvm_unreachable("Expected for-stmt or range-based for-stmt");
1993
1994 // Emit closure for later use. By-value captures will be captured here.
1995 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
1996 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
1997 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
1998 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
1999
2000 // Call the distance function to get the number of iterations of the loop to
2001 // come.
2002 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2003 ->getParam(0)
2004 ->getType()
2005 .getNonReferenceType();
2006 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2007 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2008 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2009
2010 // Emit the loop structure.
2011 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2012 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2013 llvm::Value *IndVar) {
2014 Builder.restoreIP(CodeGenIP);
2015
2016 // Emit the loop body: Convert the logical iteration number to the loop
2017 // variable and emit the body.
2018 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2019 LValue LCVal = EmitLValue(LoopVarRef);
2020 Address LoopVarAddress = LCVal.getAddress(*this);
2021 emitCapturedStmtCall(*this, LoopVarClosure,
2022 {LoopVarAddress.getPointer(), IndVar});
2023
2024 RunCleanupsScope BodyScope(*this);
2025 EmitStmt(BodyStmt);
2026 };
2027 llvm::CanonicalLoopInfo *CL =
2028 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2029
2030 // Finish up the loop.
2031 Builder.restoreIP(CL->getAfterIP());
2032 ForScope.ForceCleanup();
2033
2034 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2035 OMPLoopNestStack.push_back(CL);
2036 }
2037
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)2038 void CodeGenFunction::EmitOMPInnerLoop(
2039 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2040 const Expr *IncExpr,
2041 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2042 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2043 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2044
2045 // Start the loop with a block that tests the condition.
2046 auto CondBlock = createBasicBlock("omp.inner.for.cond");
2047 EmitBlock(CondBlock);
2048 const SourceRange R = S.getSourceRange();
2049
2050 // If attributes are attached, push to the basic block with them.
2051 const auto &OMPED = cast<OMPExecutableDirective>(S);
2052 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2053 const Stmt *SS = ICS->getCapturedStmt();
2054 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2055 OMPLoopNestStack.clear();
2056 if (AS)
2057 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2058 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2059 SourceLocToDebugLoc(R.getEnd()));
2060 else
2061 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2062 SourceLocToDebugLoc(R.getEnd()));
2063
2064 // If there are any cleanups between here and the loop-exit scope,
2065 // create a block to stage a loop exit along.
2066 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2067 if (RequiresCleanup)
2068 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2069
2070 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2071
2072 // Emit condition.
2073 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2074 if (ExitBlock != LoopExit.getBlock()) {
2075 EmitBlock(ExitBlock);
2076 EmitBranchThroughCleanup(LoopExit);
2077 }
2078
2079 EmitBlock(LoopBody);
2080 incrementProfileCounter(&S);
2081
2082 // Create a block for the increment.
2083 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2084 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2085
2086 BodyGen(*this);
2087
2088 // Emit "IV = IV + 1" and a back-edge to the condition block.
2089 EmitBlock(Continue.getBlock());
2090 EmitIgnoredExpr(IncExpr);
2091 PostIncGen(*this);
2092 BreakContinueStack.pop_back();
2093 EmitBranch(CondBlock);
2094 LoopStack.pop();
2095 // Emit the fall-through block.
2096 EmitBlock(LoopExit.getBlock());
2097 }
2098
EmitOMPLinearClauseInit(const OMPLoopDirective & D)2099 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2100 if (!HaveInsertPoint())
2101 return false;
2102 // Emit inits for the linear variables.
2103 bool HasLinears = false;
2104 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2105 for (const Expr *Init : C->inits()) {
2106 HasLinears = true;
2107 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2108 if (const auto *Ref =
2109 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2110 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2111 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2112 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2113 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2114 VD->getInit()->getType(), VK_LValue,
2115 VD->getInit()->getExprLoc());
2116 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
2117 VD->getType()),
2118 /*capturedByInit=*/false);
2119 EmitAutoVarCleanups(Emission);
2120 } else {
2121 EmitVarDecl(*VD);
2122 }
2123 }
2124 // Emit the linear steps for the linear clauses.
2125 // If a step is not constant, it is pre-calculated before the loop.
2126 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2127 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2128 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2129 // Emit calculation of the linear step.
2130 EmitIgnoredExpr(CS);
2131 }
2132 }
2133 return HasLinears;
2134 }
2135
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2136 void CodeGenFunction::EmitOMPLinearClauseFinal(
2137 const OMPLoopDirective &D,
2138 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2139 if (!HaveInsertPoint())
2140 return;
2141 llvm::BasicBlock *DoneBB = nullptr;
2142 // Emit the final values of the linear variables.
2143 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2144 auto IC = C->varlist_begin();
2145 for (const Expr *F : C->finals()) {
2146 if (!DoneBB) {
2147 if (llvm::Value *Cond = CondGen(*this)) {
2148 // If the first post-update expression is found, emit conditional
2149 // block if it was requested.
2150 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2151 DoneBB = createBasicBlock(".omp.linear.pu.done");
2152 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2153 EmitBlock(ThenBB);
2154 }
2155 }
2156 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2157 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2158 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2159 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2160 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2161 CodeGenFunction::OMPPrivateScope VarScope(*this);
2162 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2163 (void)VarScope.Privatize();
2164 EmitIgnoredExpr(F);
2165 ++IC;
2166 }
2167 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2168 EmitIgnoredExpr(PostUpdate);
2169 }
2170 if (DoneBB)
2171 EmitBlock(DoneBB, /*IsFinished=*/true);
2172 }
2173
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2174 static void emitAlignedClause(CodeGenFunction &CGF,
2175 const OMPExecutableDirective &D) {
2176 if (!CGF.HaveInsertPoint())
2177 return;
2178 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2179 llvm::APInt ClauseAlignment(64, 0);
2180 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2181 auto *AlignmentCI =
2182 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2183 ClauseAlignment = AlignmentCI->getValue();
2184 }
2185 for (const Expr *E : Clause->varlists()) {
2186 llvm::APInt Alignment(ClauseAlignment);
2187 if (Alignment == 0) {
2188 // OpenMP [2.8.1, Description]
2189 // If no optional parameter is specified, implementation-defined default
2190 // alignments for SIMD instructions on the target platforms are assumed.
2191 Alignment =
2192 CGF.getContext()
2193 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2194 E->getType()->getPointeeType()))
2195 .getQuantity();
2196 }
2197 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2198 "alignment is not power of 2");
2199 if (Alignment != 0) {
2200 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2201 CGF.emitAlignmentAssumption(
2202 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2203 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2204 }
2205 }
2206 }
2207 }
2208
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2209 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2210 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2211 if (!HaveInsertPoint())
2212 return;
2213 auto I = S.private_counters().begin();
2214 for (const Expr *E : S.counters()) {
2215 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2216 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2217 // Emit var without initialization.
2218 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2219 EmitAutoVarCleanups(VarEmission);
2220 LocalDeclMap.erase(PrivateVD);
2221 (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2222 return VarEmission.getAllocatedAddress();
2223 });
2224 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2225 VD->hasGlobalStorage()) {
2226 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2227 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2228 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2229 E->getType(), VK_LValue, E->getExprLoc());
2230 return EmitLValue(&DRE).getAddress(*this);
2231 });
2232 } else {
2233 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2234 return VarEmission.getAllocatedAddress();
2235 });
2236 }
2237 ++I;
2238 }
2239 // Privatize extra loop counters used in loops for ordered(n) clauses.
2240 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2241 if (!C->getNumForLoops())
2242 continue;
2243 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2244 I < E; ++I) {
2245 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2246 const auto *VD = cast<VarDecl>(DRE->getDecl());
2247 // Override only those variables that can be captured to avoid re-emission
2248 // of the variables declared within the loops.
2249 if (DRE->refersToEnclosingVariableOrCapture()) {
2250 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2251 return CreateMemTemp(DRE->getType(), VD->getName());
2252 });
2253 }
2254 }
2255 }
2256 }
2257
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2258 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2259 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2260 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2261 if (!CGF.HaveInsertPoint())
2262 return;
2263 {
2264 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2265 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2266 (void)PreCondScope.Privatize();
2267 // Get initial values of real counters.
2268 for (const Expr *I : S.inits()) {
2269 CGF.EmitIgnoredExpr(I);
2270 }
2271 }
2272 // Create temp loop control variables with their init values to support
2273 // non-rectangular loops.
2274 CodeGenFunction::OMPMapVars PreCondVars;
2275 for (const Expr * E: S.dependent_counters()) {
2276 if (!E)
2277 continue;
2278 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2279 "dependent counter must not be an iterator.");
2280 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2281 Address CounterAddr =
2282 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2283 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2284 }
2285 (void)PreCondVars.apply(CGF);
2286 for (const Expr *E : S.dependent_inits()) {
2287 if (!E)
2288 continue;
2289 CGF.EmitIgnoredExpr(E);
2290 }
2291 // Check that loop is executed at least one time.
2292 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2293 PreCondVars.restore(CGF);
2294 }
2295
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2296 void CodeGenFunction::EmitOMPLinearClause(
2297 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2298 if (!HaveInsertPoint())
2299 return;
2300 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2301 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2302 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2303 for (const Expr *C : LoopDirective->counters()) {
2304 SIMDLCVs.insert(
2305 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2306 }
2307 }
2308 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2309 auto CurPrivate = C->privates().begin();
2310 for (const Expr *E : C->varlists()) {
2311 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2312 const auto *PrivateVD =
2313 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2314 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2315 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2316 // Emit private VarDecl with copy init.
2317 EmitVarDecl(*PrivateVD);
2318 return GetAddrOfLocalVar(PrivateVD);
2319 });
2320 assert(IsRegistered && "linear var already registered as private");
2321 // Silence the warning about unused variable.
2322 (void)IsRegistered;
2323 } else {
2324 EmitVarDecl(*PrivateVD);
2325 }
2326 ++CurPrivate;
2327 }
2328 }
2329 }
2330
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2331 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2332 const OMPExecutableDirective &D) {
2333 if (!CGF.HaveInsertPoint())
2334 return;
2335 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2336 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2337 /*ignoreResult=*/true);
2338 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2339 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2340 // In presence of finite 'safelen', it may be unsafe to mark all
2341 // the memory instructions parallel, because loop-carried
2342 // dependences of 'safelen' iterations are possible.
2343 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2344 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2345 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2346 /*ignoreResult=*/true);
2347 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2348 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2349 // In presence of finite 'safelen', it may be unsafe to mark all
2350 // the memory instructions parallel, because loop-carried
2351 // dependences of 'safelen' iterations are possible.
2352 CGF.LoopStack.setParallel(/*Enable=*/false);
2353 }
2354 }
2355
EmitOMPSimdInit(const OMPLoopDirective & D)2356 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2357 // Walk clauses and process safelen/lastprivate.
2358 LoopStack.setParallel(/*Enable=*/true);
2359 LoopStack.setVectorizeEnable();
2360 emitSimdlenSafelenClause(*this, D);
2361 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2362 if (C->getKind() == OMPC_ORDER_concurrent)
2363 LoopStack.setParallel(/*Enable=*/true);
2364 if ((D.getDirectiveKind() == OMPD_simd ||
2365 (getLangOpts().OpenMPSimd &&
2366 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2367 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2368 [](const OMPReductionClause *C) {
2369 return C->getModifier() == OMPC_REDUCTION_inscan;
2370 }))
2371 // Disable parallel access in case of prefix sum.
2372 LoopStack.setParallel(/*Enable=*/false);
2373 }
2374
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2375 void CodeGenFunction::EmitOMPSimdFinal(
2376 const OMPLoopDirective &D,
2377 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2378 if (!HaveInsertPoint())
2379 return;
2380 llvm::BasicBlock *DoneBB = nullptr;
2381 auto IC = D.counters().begin();
2382 auto IPC = D.private_counters().begin();
2383 for (const Expr *F : D.finals()) {
2384 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2385 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2386 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2387 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2388 OrigVD->hasGlobalStorage() || CED) {
2389 if (!DoneBB) {
2390 if (llvm::Value *Cond = CondGen(*this)) {
2391 // If the first post-update expression is found, emit conditional
2392 // block if it was requested.
2393 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2394 DoneBB = createBasicBlock(".omp.final.done");
2395 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2396 EmitBlock(ThenBB);
2397 }
2398 }
2399 Address OrigAddr = Address::invalid();
2400 if (CED) {
2401 OrigAddr =
2402 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2403 } else {
2404 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2405 /*RefersToEnclosingVariableOrCapture=*/false,
2406 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2407 OrigAddr = EmitLValue(&DRE).getAddress(*this);
2408 }
2409 OMPPrivateScope VarScope(*this);
2410 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2411 (void)VarScope.Privatize();
2412 EmitIgnoredExpr(F);
2413 }
2414 ++IC;
2415 ++IPC;
2416 }
2417 if (DoneBB)
2418 EmitBlock(DoneBB, /*IsFinished=*/true);
2419 }
2420
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2421 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2422 const OMPLoopDirective &S,
2423 CodeGenFunction::JumpDest LoopExit) {
2424 CGF.EmitOMPLoopBody(S, LoopExit);
2425 CGF.EmitStopPoint(&S);
2426 }
2427
2428 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2429 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2430 const DeclRefExpr *Helper) {
2431 auto VDecl = cast<VarDecl>(Helper->getDecl());
2432 CGF.EmitVarDecl(*VDecl);
2433 return CGF.EmitLValue(Helper);
2434 }
2435
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2436 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2437 const RegionCodeGenTy &SimdInitGen,
2438 const RegionCodeGenTy &BodyCodeGen) {
2439 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2440 PrePostActionTy &) {
2441 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2442 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2443 SimdInitGen(CGF);
2444
2445 BodyCodeGen(CGF);
2446 };
2447 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2448 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2449 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2450
2451 BodyCodeGen(CGF);
2452 };
2453 const Expr *IfCond = nullptr;
2454 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2455 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2456 if (CGF.getLangOpts().OpenMP >= 50 &&
2457 (C->getNameModifier() == OMPD_unknown ||
2458 C->getNameModifier() == OMPD_simd)) {
2459 IfCond = C->getCondition();
2460 break;
2461 }
2462 }
2463 }
2464 if (IfCond) {
2465 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2466 } else {
2467 RegionCodeGenTy ThenRCG(ThenGen);
2468 ThenRCG(CGF);
2469 }
2470 }
2471
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2472 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2473 PrePostActionTy &Action) {
2474 Action.Enter(CGF);
2475 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2476 "Expected simd directive");
2477 OMPLoopScope PreInitScope(CGF, S);
2478 // if (PreCond) {
2479 // for (IV in 0..LastIteration) BODY;
2480 // <Final counter/linear vars updates>;
2481 // }
2482 //
2483 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2484 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2485 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2486 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2487 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2488 }
2489
2490 // Emit: if (PreCond) - begin.
2491 // If the condition constant folds and can be elided, avoid emitting the
2492 // whole loop.
2493 bool CondConstant;
2494 llvm::BasicBlock *ContBlock = nullptr;
2495 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2496 if (!CondConstant)
2497 return;
2498 } else {
2499 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2500 ContBlock = CGF.createBasicBlock("simd.if.end");
2501 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2502 CGF.getProfileCount(&S));
2503 CGF.EmitBlock(ThenBlock);
2504 CGF.incrementProfileCounter(&S);
2505 }
2506
2507 // Emit the loop iteration variable.
2508 const Expr *IVExpr = S.getIterationVariable();
2509 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2510 CGF.EmitVarDecl(*IVDecl);
2511 CGF.EmitIgnoredExpr(S.getInit());
2512
2513 // Emit the iterations count variable.
2514 // If it is not a variable, Sema decided to calculate iterations count on
2515 // each iteration (e.g., it is foldable into a constant).
2516 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2517 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2518 // Emit calculation of the iterations count.
2519 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2520 }
2521
2522 emitAlignedClause(CGF, S);
2523 (void)CGF.EmitOMPLinearClauseInit(S);
2524 {
2525 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2526 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2527 CGF.EmitOMPLinearClause(S, LoopScope);
2528 CGF.EmitOMPPrivateClause(S, LoopScope);
2529 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2530 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2531 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2532 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2533 (void)LoopScope.Privatize();
2534 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2535 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2536
2537 emitCommonSimdLoop(
2538 CGF, S,
2539 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2540 CGF.EmitOMPSimdInit(S);
2541 },
2542 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2543 CGF.EmitOMPInnerLoop(
2544 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2545 [&S](CodeGenFunction &CGF) {
2546 emitOMPLoopBodyWithStopPoint(CGF, S,
2547 CodeGenFunction::JumpDest());
2548 },
2549 [](CodeGenFunction &) {});
2550 });
2551 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2552 // Emit final copy of the lastprivate variables at the end of loops.
2553 if (HasLastprivateClause)
2554 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2555 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2556 emitPostUpdateForReductionClause(CGF, S,
2557 [](CodeGenFunction &) { return nullptr; });
2558 }
2559 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2560 // Emit: if (PreCond) - end.
2561 if (ContBlock) {
2562 CGF.EmitBranch(ContBlock);
2563 CGF.EmitBlock(ContBlock, true);
2564 }
2565 }
2566
EmitOMPSimdDirective(const OMPSimdDirective & S)2567 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2568 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2569 OMPFirstScanLoop = true;
2570 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2571 emitOMPSimdRegion(CGF, S, Action);
2572 };
2573 {
2574 auto LPCRegion =
2575 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2576 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2577 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2578 }
2579 // Check for outer lastprivate conditional update.
2580 checkForLastprivateConditionalUpdate(*this, S);
2581 }
2582
EmitOMPTileDirective(const OMPTileDirective & S)2583 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2584 // Emit the de-sugared statement.
2585 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2586 EmitStmt(S.getTransformedStmt());
2587 }
2588
EmitOMPUnrollDirective(const OMPUnrollDirective & S)2589 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2590 // This function is only called if the unrolled loop is not consumed by any
2591 // other loop-associated construct. Such a loop-associated construct will have
2592 // used the transformed AST.
2593
2594 // Set the unroll metadata for the next emitted loop.
2595 LoopStack.setUnrollState(LoopAttributes::Enable);
2596
2597 if (S.hasClausesOfKind<OMPFullClause>()) {
2598 LoopStack.setUnrollState(LoopAttributes::Full);
2599 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2600 if (Expr *FactorExpr = PartialClause->getFactor()) {
2601 uint64_t Factor =
2602 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2603 assert(Factor >= 1 && "Only positive factors are valid");
2604 LoopStack.setUnrollCount(Factor);
2605 }
2606 }
2607
2608 EmitStmt(S.getAssociatedStmt());
2609 }
2610
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2611 void CodeGenFunction::EmitOMPOuterLoop(
2612 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2613 CodeGenFunction::OMPPrivateScope &LoopScope,
2614 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2615 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2616 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2617 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2618
2619 const Expr *IVExpr = S.getIterationVariable();
2620 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2621 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2622
2623 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2624
2625 // Start the loop with a block that tests the condition.
2626 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2627 EmitBlock(CondBlock);
2628 const SourceRange R = S.getSourceRange();
2629 OMPLoopNestStack.clear();
2630 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2631 SourceLocToDebugLoc(R.getEnd()));
2632
2633 llvm::Value *BoolCondVal = nullptr;
2634 if (!DynamicOrOrdered) {
2635 // UB = min(UB, GlobalUB) or
2636 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2637 // 'distribute parallel for')
2638 EmitIgnoredExpr(LoopArgs.EUB);
2639 // IV = LB
2640 EmitIgnoredExpr(LoopArgs.Init);
2641 // IV < UB
2642 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2643 } else {
2644 BoolCondVal =
2645 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2646 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2647 }
2648
2649 // If there are any cleanups between here and the loop-exit scope,
2650 // create a block to stage a loop exit along.
2651 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2652 if (LoopScope.requiresCleanups())
2653 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2654
2655 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2656 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2657 if (ExitBlock != LoopExit.getBlock()) {
2658 EmitBlock(ExitBlock);
2659 EmitBranchThroughCleanup(LoopExit);
2660 }
2661 EmitBlock(LoopBody);
2662
2663 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2664 // LB for loop condition and emitted it above).
2665 if (DynamicOrOrdered)
2666 EmitIgnoredExpr(LoopArgs.Init);
2667
2668 // Create a block for the increment.
2669 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2670 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2671
2672 emitCommonSimdLoop(
2673 *this, S,
2674 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2675 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2676 // with dynamic/guided scheduling and without ordered clause.
2677 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2678 CGF.LoopStack.setParallel(!IsMonotonic);
2679 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2680 if (C->getKind() == OMPC_ORDER_concurrent)
2681 CGF.LoopStack.setParallel(/*Enable=*/true);
2682 } else {
2683 CGF.EmitOMPSimdInit(S);
2684 }
2685 },
2686 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2687 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2688 SourceLocation Loc = S.getBeginLoc();
2689 // when 'distribute' is not combined with a 'for':
2690 // while (idx <= UB) { BODY; ++idx; }
2691 // when 'distribute' is combined with a 'for'
2692 // (e.g. 'distribute parallel for')
2693 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2694 CGF.EmitOMPInnerLoop(
2695 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2696 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2697 CodeGenLoop(CGF, S, LoopExit);
2698 },
2699 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2700 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2701 });
2702 });
2703
2704 EmitBlock(Continue.getBlock());
2705 BreakContinueStack.pop_back();
2706 if (!DynamicOrOrdered) {
2707 // Emit "LB = LB + Stride", "UB = UB + Stride".
2708 EmitIgnoredExpr(LoopArgs.NextLB);
2709 EmitIgnoredExpr(LoopArgs.NextUB);
2710 }
2711
2712 EmitBranch(CondBlock);
2713 OMPLoopNestStack.clear();
2714 LoopStack.pop();
2715 // Emit the fall-through block.
2716 EmitBlock(LoopExit.getBlock());
2717
2718 // Tell the runtime we are done.
2719 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2720 if (!DynamicOrOrdered)
2721 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2722 S.getDirectiveKind());
2723 };
2724 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2725 }
2726
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2727 void CodeGenFunction::EmitOMPForOuterLoop(
2728 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2729 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2730 const OMPLoopArguments &LoopArgs,
2731 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2732 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2733
2734 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2735 const bool DynamicOrOrdered =
2736 Ordered || RT.isDynamic(ScheduleKind.Schedule);
2737
2738 assert((Ordered ||
2739 !RT.isStaticNonchunked(ScheduleKind.Schedule,
2740 LoopArgs.Chunk != nullptr)) &&
2741 "static non-chunked schedule does not need outer loop");
2742
2743 // Emit outer loop.
2744 //
2745 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2746 // When schedule(dynamic,chunk_size) is specified, the iterations are
2747 // distributed to threads in the team in chunks as the threads request them.
2748 // Each thread executes a chunk of iterations, then requests another chunk,
2749 // until no chunks remain to be distributed. Each chunk contains chunk_size
2750 // iterations, except for the last chunk to be distributed, which may have
2751 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2752 //
2753 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2754 // to threads in the team in chunks as the executing threads request them.
2755 // Each thread executes a chunk of iterations, then requests another chunk,
2756 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2757 // each chunk is proportional to the number of unassigned iterations divided
2758 // by the number of threads in the team, decreasing to 1. For a chunk_size
2759 // with value k (greater than 1), the size of each chunk is determined in the
2760 // same way, with the restriction that the chunks do not contain fewer than k
2761 // iterations (except for the last chunk to be assigned, which may have fewer
2762 // than k iterations).
2763 //
2764 // When schedule(auto) is specified, the decision regarding scheduling is
2765 // delegated to the compiler and/or runtime system. The programmer gives the
2766 // implementation the freedom to choose any possible mapping of iterations to
2767 // threads in the team.
2768 //
2769 // When schedule(runtime) is specified, the decision regarding scheduling is
2770 // deferred until run time, and the schedule and chunk size are taken from the
2771 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2772 // implementation defined
2773 //
2774 // while(__kmpc_dispatch_next(&LB, &UB)) {
2775 // idx = LB;
2776 // while (idx <= UB) { BODY; ++idx;
2777 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2778 // } // inner loop
2779 // }
2780 //
2781 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2782 // When schedule(static, chunk_size) is specified, iterations are divided into
2783 // chunks of size chunk_size, and the chunks are assigned to the threads in
2784 // the team in a round-robin fashion in the order of the thread number.
2785 //
2786 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2787 // while (idx <= UB) { BODY; ++idx; } // inner loop
2788 // LB = LB + ST;
2789 // UB = UB + ST;
2790 // }
2791 //
2792
2793 const Expr *IVExpr = S.getIterationVariable();
2794 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2795 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2796
2797 if (DynamicOrOrdered) {
2798 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2799 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2800 llvm::Value *LBVal = DispatchBounds.first;
2801 llvm::Value *UBVal = DispatchBounds.second;
2802 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2803 LoopArgs.Chunk};
2804 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2805 IVSigned, Ordered, DipatchRTInputValues);
2806 } else {
2807 CGOpenMPRuntime::StaticRTInput StaticInit(
2808 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2809 LoopArgs.ST, LoopArgs.Chunk);
2810 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2811 ScheduleKind, StaticInit);
2812 }
2813
2814 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2815 const unsigned IVSize,
2816 const bool IVSigned) {
2817 if (Ordered) {
2818 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2819 IVSigned);
2820 }
2821 };
2822
2823 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2824 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2825 OuterLoopArgs.IncExpr = S.getInc();
2826 OuterLoopArgs.Init = S.getInit();
2827 OuterLoopArgs.Cond = S.getCond();
2828 OuterLoopArgs.NextLB = S.getNextLowerBound();
2829 OuterLoopArgs.NextUB = S.getNextUpperBound();
2830 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2831 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2832 }
2833
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)2834 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2835 const unsigned IVSize, const bool IVSigned) {}
2836
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)2837 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2838 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2839 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2840 const CodeGenLoopTy &CodeGenLoopContent) {
2841
2842 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2843
2844 // Emit outer loop.
2845 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2846 // dynamic
2847 //
2848
2849 const Expr *IVExpr = S.getIterationVariable();
2850 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2851 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2852
2853 CGOpenMPRuntime::StaticRTInput StaticInit(
2854 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2855 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2856 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2857
2858 // for combined 'distribute' and 'for' the increment expression of distribute
2859 // is stored in DistInc. For 'distribute' alone, it is in Inc.
2860 Expr *IncExpr;
2861 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2862 IncExpr = S.getDistInc();
2863 else
2864 IncExpr = S.getInc();
2865
2866 // this routine is shared by 'omp distribute parallel for' and
2867 // 'omp distribute': select the right EUB expression depending on the
2868 // directive
2869 OMPLoopArguments OuterLoopArgs;
2870 OuterLoopArgs.LB = LoopArgs.LB;
2871 OuterLoopArgs.UB = LoopArgs.UB;
2872 OuterLoopArgs.ST = LoopArgs.ST;
2873 OuterLoopArgs.IL = LoopArgs.IL;
2874 OuterLoopArgs.Chunk = LoopArgs.Chunk;
2875 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2876 ? S.getCombinedEnsureUpperBound()
2877 : S.getEnsureUpperBound();
2878 OuterLoopArgs.IncExpr = IncExpr;
2879 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2880 ? S.getCombinedInit()
2881 : S.getInit();
2882 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2883 ? S.getCombinedCond()
2884 : S.getCond();
2885 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2886 ? S.getCombinedNextLowerBound()
2887 : S.getNextLowerBound();
2888 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2889 ? S.getCombinedNextUpperBound()
2890 : S.getNextUpperBound();
2891
2892 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2893 LoopScope, OuterLoopArgs, CodeGenLoopContent,
2894 emitEmptyOrdered);
2895 }
2896
2897 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)2898 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2899 const OMPExecutableDirective &S) {
2900 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2901 LValue LB =
2902 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2903 LValue UB =
2904 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2905
2906 // When composing 'distribute' with 'for' (e.g. as in 'distribute
2907 // parallel for') we need to use the 'distribute'
2908 // chunk lower and upper bounds rather than the whole loop iteration
2909 // space. These are parameters to the outlined function for 'parallel'
2910 // and we copy the bounds of the previous schedule into the
2911 // the current ones.
2912 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2913 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2914 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2915 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2916 PrevLBVal = CGF.EmitScalarConversion(
2917 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2918 LS.getIterationVariable()->getType(),
2919 LS.getPrevLowerBoundVariable()->getExprLoc());
2920 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2921 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2922 PrevUBVal = CGF.EmitScalarConversion(
2923 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2924 LS.getIterationVariable()->getType(),
2925 LS.getPrevUpperBoundVariable()->getExprLoc());
2926
2927 CGF.EmitStoreOfScalar(PrevLBVal, LB);
2928 CGF.EmitStoreOfScalar(PrevUBVal, UB);
2929
2930 return {LB, UB};
2931 }
2932
2933 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2934 /// we need to use the LB and UB expressions generated by the worksharing
2935 /// code generation support, whereas in non combined situations we would
2936 /// just emit 0 and the LastIteration expression
2937 /// This function is necessary due to the difference of the LB and UB
2938 /// types for the RT emission routines for 'for_static_init' and
2939 /// 'for_dispatch_init'
2940 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)2941 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2942 const OMPExecutableDirective &S,
2943 Address LB, Address UB) {
2944 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2945 const Expr *IVExpr = LS.getIterationVariable();
2946 // when implementing a dynamic schedule for a 'for' combined with a
2947 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2948 // is not normalized as each team only executes its own assigned
2949 // distribute chunk
2950 QualType IteratorTy = IVExpr->getType();
2951 llvm::Value *LBVal =
2952 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2953 llvm::Value *UBVal =
2954 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2955 return {LBVal, UBVal};
2956 }
2957
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)2958 static void emitDistributeParallelForDistributeInnerBoundParams(
2959 CodeGenFunction &CGF, const OMPExecutableDirective &S,
2960 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2961 const auto &Dir = cast<OMPLoopDirective>(S);
2962 LValue LB =
2963 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2964 llvm::Value *LBCast =
2965 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2966 CGF.SizeTy, /*isSigned=*/false);
2967 CapturedVars.push_back(LBCast);
2968 LValue UB =
2969 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2970
2971 llvm::Value *UBCast =
2972 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2973 CGF.SizeTy, /*isSigned=*/false);
2974 CapturedVars.push_back(UBCast);
2975 }
2976
2977 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2978 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2979 const OMPLoopDirective &S,
2980 CodeGenFunction::JumpDest LoopExit) {
2981 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2982 PrePostActionTy &Action) {
2983 Action.Enter(CGF);
2984 bool HasCancel = false;
2985 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2986 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2987 HasCancel = D->hasCancel();
2988 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2989 HasCancel = D->hasCancel();
2990 else if (const auto *D =
2991 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2992 HasCancel = D->hasCancel();
2993 }
2994 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2995 HasCancel);
2996 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2997 emitDistributeParallelForInnerBounds,
2998 emitDistributeParallelForDispatchBounds);
2999 };
3000
3001 emitCommonOMPParallelDirective(
3002 CGF, S,
3003 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3004 CGInlinedWorksharingLoop,
3005 emitDistributeParallelForDistributeInnerBoundParams);
3006 }
3007
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)3008 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3009 const OMPDistributeParallelForDirective &S) {
3010 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3011 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3012 S.getDistInc());
3013 };
3014 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3015 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3016 }
3017
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)3018 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3019 const OMPDistributeParallelForSimdDirective &S) {
3020 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3021 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3022 S.getDistInc());
3023 };
3024 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3025 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3026 }
3027
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)3028 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3029 const OMPDistributeSimdDirective &S) {
3030 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3031 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3032 };
3033 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3034 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3035 }
3036
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)3037 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3038 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3039 // Emit SPMD target parallel for region as a standalone region.
3040 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3041 emitOMPSimdRegion(CGF, S, Action);
3042 };
3043 llvm::Function *Fn;
3044 llvm::Constant *Addr;
3045 // Emit target region as a standalone region.
3046 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3047 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3048 assert(Fn && Addr && "Target device function emission failed.");
3049 }
3050
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)3051 void CodeGenFunction::EmitOMPTargetSimdDirective(
3052 const OMPTargetSimdDirective &S) {
3053 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3054 emitOMPSimdRegion(CGF, S, Action);
3055 };
3056 emitCommonOMPTargetDirective(*this, S, CodeGen);
3057 }
3058
3059 namespace {
3060 struct ScheduleKindModifiersTy {
3061 OpenMPScheduleClauseKind Kind;
3062 OpenMPScheduleClauseModifier M1;
3063 OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anon4f46be5a4111::ScheduleKindModifiersTy3064 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3065 OpenMPScheduleClauseModifier M1,
3066 OpenMPScheduleClauseModifier M2)
3067 : Kind(Kind), M1(M1), M2(M2) {}
3068 };
3069 } // namespace
3070
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)3071 bool CodeGenFunction::EmitOMPWorksharingLoop(
3072 const OMPLoopDirective &S, Expr *EUB,
3073 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3074 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3075 // Emit the loop iteration variable.
3076 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3077 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3078 EmitVarDecl(*IVDecl);
3079
3080 // Emit the iterations count variable.
3081 // If it is not a variable, Sema decided to calculate iterations count on each
3082 // iteration (e.g., it is foldable into a constant).
3083 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3084 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3085 // Emit calculation of the iterations count.
3086 EmitIgnoredExpr(S.getCalcLastIteration());
3087 }
3088
3089 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3090
3091 bool HasLastprivateClause;
3092 // Check pre-condition.
3093 {
3094 OMPLoopScope PreInitScope(*this, S);
3095 // Skip the entire loop if we don't meet the precondition.
3096 // If the condition constant folds and can be elided, avoid emitting the
3097 // whole loop.
3098 bool CondConstant;
3099 llvm::BasicBlock *ContBlock = nullptr;
3100 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3101 if (!CondConstant)
3102 return false;
3103 } else {
3104 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3105 ContBlock = createBasicBlock("omp.precond.end");
3106 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3107 getProfileCount(&S));
3108 EmitBlock(ThenBlock);
3109 incrementProfileCounter(&S);
3110 }
3111
3112 RunCleanupsScope DoacrossCleanupScope(*this);
3113 bool Ordered = false;
3114 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3115 if (OrderedClause->getNumForLoops())
3116 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3117 else
3118 Ordered = true;
3119 }
3120
3121 llvm::DenseSet<const Expr *> EmittedFinals;
3122 emitAlignedClause(*this, S);
3123 bool HasLinears = EmitOMPLinearClauseInit(S);
3124 // Emit helper vars inits.
3125
3126 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3127 LValue LB = Bounds.first;
3128 LValue UB = Bounds.second;
3129 LValue ST =
3130 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3131 LValue IL =
3132 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3133
3134 // Emit 'then' code.
3135 {
3136 OMPPrivateScope LoopScope(*this);
3137 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3138 // Emit implicit barrier to synchronize threads and avoid data races on
3139 // initialization of firstprivate variables and post-update of
3140 // lastprivate variables.
3141 CGM.getOpenMPRuntime().emitBarrierCall(
3142 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3143 /*ForceSimpleCall=*/true);
3144 }
3145 EmitOMPPrivateClause(S, LoopScope);
3146 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3147 *this, S, EmitLValue(S.getIterationVariable()));
3148 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3149 EmitOMPReductionClauseInit(S, LoopScope);
3150 EmitOMPPrivateLoopCounters(S, LoopScope);
3151 EmitOMPLinearClause(S, LoopScope);
3152 (void)LoopScope.Privatize();
3153 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3154 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3155
3156 // Detect the loop schedule kind and chunk.
3157 const Expr *ChunkExpr = nullptr;
3158 OpenMPScheduleTy ScheduleKind;
3159 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3160 ScheduleKind.Schedule = C->getScheduleKind();
3161 ScheduleKind.M1 = C->getFirstScheduleModifier();
3162 ScheduleKind.M2 = C->getSecondScheduleModifier();
3163 ChunkExpr = C->getChunkSize();
3164 } else {
3165 // Default behaviour for schedule clause.
3166 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3167 *this, S, ScheduleKind.Schedule, ChunkExpr);
3168 }
3169 bool HasChunkSizeOne = false;
3170 llvm::Value *Chunk = nullptr;
3171 if (ChunkExpr) {
3172 Chunk = EmitScalarExpr(ChunkExpr);
3173 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3174 S.getIterationVariable()->getType(),
3175 S.getBeginLoc());
3176 Expr::EvalResult Result;
3177 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3178 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3179 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3180 }
3181 }
3182 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3183 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3184 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3185 // If the static schedule kind is specified or if the ordered clause is
3186 // specified, and if no monotonic modifier is specified, the effect will
3187 // be as if the monotonic modifier was specified.
3188 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
3189 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
3190 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3191 bool IsMonotonic =
3192 Ordered ||
3193 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3194 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3195 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3196 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3197 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3198 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3199 /* Chunked */ Chunk != nullptr) ||
3200 StaticChunkedOne) &&
3201 !Ordered) {
3202 JumpDest LoopExit =
3203 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3204 emitCommonSimdLoop(
3205 *this, S,
3206 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3207 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3208 CGF.EmitOMPSimdInit(S);
3209 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3210 if (C->getKind() == OMPC_ORDER_concurrent)
3211 CGF.LoopStack.setParallel(/*Enable=*/true);
3212 }
3213 },
3214 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3215 &S, ScheduleKind, LoopExit,
3216 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3217 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3218 // When no chunk_size is specified, the iteration space is divided
3219 // into chunks that are approximately equal in size, and at most
3220 // one chunk is distributed to each thread. Note that the size of
3221 // the chunks is unspecified in this case.
3222 CGOpenMPRuntime::StaticRTInput StaticInit(
3223 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3224 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3225 StaticChunkedOne ? Chunk : nullptr);
3226 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3227 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3228 StaticInit);
3229 // UB = min(UB, GlobalUB);
3230 if (!StaticChunkedOne)
3231 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3232 // IV = LB;
3233 CGF.EmitIgnoredExpr(S.getInit());
3234 // For unchunked static schedule generate:
3235 //
3236 // while (idx <= UB) {
3237 // BODY;
3238 // ++idx;
3239 // }
3240 //
3241 // For static schedule with chunk one:
3242 //
3243 // while (IV <= PrevUB) {
3244 // BODY;
3245 // IV += ST;
3246 // }
3247 CGF.EmitOMPInnerLoop(
3248 S, LoopScope.requiresCleanups(),
3249 StaticChunkedOne ? S.getCombinedParForInDistCond()
3250 : S.getCond(),
3251 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3252 [&S, LoopExit](CodeGenFunction &CGF) {
3253 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3254 },
3255 [](CodeGenFunction &) {});
3256 });
3257 EmitBlock(LoopExit.getBlock());
3258 // Tell the runtime we are done.
3259 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3260 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3261 S.getDirectiveKind());
3262 };
3263 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3264 } else {
3265 // Emit the outer loop, which requests its work chunk [LB..UB] from
3266 // runtime and runs the inner loop to process it.
3267 const OMPLoopArguments LoopArguments(
3268 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3269 IL.getAddress(*this), Chunk, EUB);
3270 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3271 LoopArguments, CGDispatchBounds);
3272 }
3273 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3274 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3275 return CGF.Builder.CreateIsNotNull(
3276 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3277 });
3278 }
3279 EmitOMPReductionClauseFinal(
3280 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3281 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3282 : /*Parallel only*/ OMPD_parallel);
3283 // Emit post-update of the reduction variables if IsLastIter != 0.
3284 emitPostUpdateForReductionClause(
3285 *this, S, [IL, &S](CodeGenFunction &CGF) {
3286 return CGF.Builder.CreateIsNotNull(
3287 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3288 });
3289 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3290 if (HasLastprivateClause)
3291 EmitOMPLastprivateClauseFinal(
3292 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3293 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3294 }
3295 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3296 return CGF.Builder.CreateIsNotNull(
3297 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3298 });
3299 DoacrossCleanupScope.ForceCleanup();
3300 // We're now done with the loop, so jump to the continuation block.
3301 if (ContBlock) {
3302 EmitBranch(ContBlock);
3303 EmitBlock(ContBlock, /*IsFinished=*/true);
3304 }
3305 }
3306 return HasLastprivateClause;
3307 }
3308
3309 /// The following two functions generate expressions for the loop lower
3310 /// and upper bounds in case of static and dynamic (dispatch) schedule
3311 /// of the associated 'for' or 'distribute' loop.
3312 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3313 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3314 const auto &LS = cast<OMPLoopDirective>(S);
3315 LValue LB =
3316 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3317 LValue UB =
3318 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3319 return {LB, UB};
3320 }
3321
3322 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3323 /// consider the lower and upper bound expressions generated by the
3324 /// worksharing loop support, but we use 0 and the iteration space size as
3325 /// constants
3326 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3327 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3328 Address LB, Address UB) {
3329 const auto &LS = cast<OMPLoopDirective>(S);
3330 const Expr *IVExpr = LS.getIterationVariable();
3331 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3332 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3333 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3334 return {LBVal, UBVal};
3335 }
3336
3337 /// Emits internal temp array declarations for the directive with inscan
3338 /// reductions.
3339 /// The code is the following:
3340 /// \code
3341 /// size num_iters = <num_iters>;
3342 /// <type> buffer[num_iters];
3343 /// \endcode
emitScanBasedDirectiveDecls(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3344 static void emitScanBasedDirectiveDecls(
3345 CodeGenFunction &CGF, const OMPLoopDirective &S,
3346 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3347 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3348 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3349 SmallVector<const Expr *, 4> Shareds;
3350 SmallVector<const Expr *, 4> Privates;
3351 SmallVector<const Expr *, 4> ReductionOps;
3352 SmallVector<const Expr *, 4> CopyArrayTemps;
3353 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3354 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3355 "Only inscan reductions are expected.");
3356 Shareds.append(C->varlist_begin(), C->varlist_end());
3357 Privates.append(C->privates().begin(), C->privates().end());
3358 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3359 CopyArrayTemps.append(C->copy_array_temps().begin(),
3360 C->copy_array_temps().end());
3361 }
3362 {
3363 // Emit buffers for each reduction variables.
3364 // ReductionCodeGen is required to emit correctly the code for array
3365 // reductions.
3366 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3367 unsigned Count = 0;
3368 auto *ITA = CopyArrayTemps.begin();
3369 for (const Expr *IRef : Privates) {
3370 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3371 // Emit variably modified arrays, used for arrays/array sections
3372 // reductions.
3373 if (PrivateVD->getType()->isVariablyModifiedType()) {
3374 RedCG.emitSharedOrigLValue(CGF, Count);
3375 RedCG.emitAggregateType(CGF, Count);
3376 }
3377 CodeGenFunction::OpaqueValueMapping DimMapping(
3378 CGF,
3379 cast<OpaqueValueExpr>(
3380 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3381 ->getSizeExpr()),
3382 RValue::get(OMPScanNumIterations));
3383 // Emit temp buffer.
3384 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3385 ++ITA;
3386 ++Count;
3387 }
3388 }
3389 }
3390
3391 /// Emits the code for the directive with inscan reductions.
3392 /// The code is the following:
3393 /// \code
3394 /// #pragma omp ...
3395 /// for (i: 0..<num_iters>) {
3396 /// <input phase>;
3397 /// buffer[i] = red;
3398 /// }
3399 /// #pragma omp master // in parallel region
3400 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3401 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3402 /// buffer[i] op= buffer[i-pow(2,k)];
3403 /// #pragma omp barrier // in parallel region
3404 /// #pragma omp ...
3405 /// for (0..<num_iters>) {
3406 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3407 /// <scan phase>;
3408 /// }
3409 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3410 static void emitScanBasedDirective(
3411 CodeGenFunction &CGF, const OMPLoopDirective &S,
3412 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3413 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3414 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3415 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3416 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3417 SmallVector<const Expr *, 4> Privates;
3418 SmallVector<const Expr *, 4> ReductionOps;
3419 SmallVector<const Expr *, 4> LHSs;
3420 SmallVector<const Expr *, 4> RHSs;
3421 SmallVector<const Expr *, 4> CopyArrayElems;
3422 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3423 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3424 "Only inscan reductions are expected.");
3425 Privates.append(C->privates().begin(), C->privates().end());
3426 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3427 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3428 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3429 CopyArrayElems.append(C->copy_array_elems().begin(),
3430 C->copy_array_elems().end());
3431 }
3432 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3433 {
3434 // Emit loop with input phase:
3435 // #pragma omp ...
3436 // for (i: 0..<num_iters>) {
3437 // <input phase>;
3438 // buffer[i] = red;
3439 // }
3440 CGF.OMPFirstScanLoop = true;
3441 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3442 FirstGen(CGF);
3443 }
3444 // #pragma omp barrier // in parallel region
3445 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3446 &ReductionOps,
3447 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3448 Action.Enter(CGF);
3449 // Emit prefix reduction:
3450 // #pragma omp master // in parallel region
3451 // for (int k = 0; k <= ceil(log2(n)); ++k)
3452 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3453 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3454 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3455 llvm::Function *F =
3456 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3457 llvm::Value *Arg =
3458 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3459 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3460 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3461 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3462 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3463 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3464 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3465 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3466 CGF.EmitBlock(LoopBB);
3467 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3468 // size pow2k = 1;
3469 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3470 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3471 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3472 // for (size i = n - 1; i >= 2 ^ k; --i)
3473 // tmp[i] op= tmp[i-pow2k];
3474 llvm::BasicBlock *InnerLoopBB =
3475 CGF.createBasicBlock("omp.inner.log.scan.body");
3476 llvm::BasicBlock *InnerExitBB =
3477 CGF.createBasicBlock("omp.inner.log.scan.exit");
3478 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3479 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3480 CGF.EmitBlock(InnerLoopBB);
3481 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3482 IVal->addIncoming(NMin1, LoopBB);
3483 {
3484 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3485 auto *ILHS = LHSs.begin();
3486 auto *IRHS = RHSs.begin();
3487 for (const Expr *CopyArrayElem : CopyArrayElems) {
3488 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3489 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3490 Address LHSAddr = Address::invalid();
3491 {
3492 CodeGenFunction::OpaqueValueMapping IdxMapping(
3493 CGF,
3494 cast<OpaqueValueExpr>(
3495 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3496 RValue::get(IVal));
3497 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3498 }
3499 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3500 Address RHSAddr = Address::invalid();
3501 {
3502 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3503 CodeGenFunction::OpaqueValueMapping IdxMapping(
3504 CGF,
3505 cast<OpaqueValueExpr>(
3506 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3507 RValue::get(OffsetIVal));
3508 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3509 }
3510 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3511 ++ILHS;
3512 ++IRHS;
3513 }
3514 PrivScope.Privatize();
3515 CGF.CGM.getOpenMPRuntime().emitReduction(
3516 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3517 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3518 }
3519 llvm::Value *NextIVal =
3520 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3521 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3522 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3523 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3524 CGF.EmitBlock(InnerExitBB);
3525 llvm::Value *Next =
3526 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3527 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3528 // pow2k <<= 1;
3529 llvm::Value *NextPow2K =
3530 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3531 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3532 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3533 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3534 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3535 CGF.EmitBlock(ExitBB);
3536 };
3537 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3538 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3539 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3540 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3541 /*ForceSimpleCall=*/true);
3542 } else {
3543 RegionCodeGenTy RCG(CodeGen);
3544 RCG(CGF);
3545 }
3546
3547 CGF.OMPFirstScanLoop = false;
3548 SecondGen(CGF);
3549 }
3550
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3551 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3552 const OMPLoopDirective &S,
3553 bool HasCancel) {
3554 bool HasLastprivates;
3555 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3556 [](const OMPReductionClause *C) {
3557 return C->getModifier() == OMPC_REDUCTION_inscan;
3558 })) {
3559 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3560 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3561 OMPLoopScope LoopScope(CGF, S);
3562 return CGF.EmitScalarExpr(S.getNumIterations());
3563 };
3564 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3565 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3566 CGF, S.getDirectiveKind(), HasCancel);
3567 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3568 emitForLoopBounds,
3569 emitDispatchForLoopBounds);
3570 // Emit an implicit barrier at the end.
3571 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3572 OMPD_for);
3573 };
3574 const auto &&SecondGen = [&S, HasCancel,
3575 &HasLastprivates](CodeGenFunction &CGF) {
3576 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3577 CGF, S.getDirectiveKind(), HasCancel);
3578 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3579 emitForLoopBounds,
3580 emitDispatchForLoopBounds);
3581 };
3582 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3583 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3584 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3585 } else {
3586 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3587 HasCancel);
3588 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3589 emitForLoopBounds,
3590 emitDispatchForLoopBounds);
3591 }
3592 return HasLastprivates;
3593 }
3594
isSupportedByOpenMPIRBuilder(const OMPForDirective & S)3595 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3596 if (S.hasCancel())
3597 return false;
3598 for (OMPClause *C : S.clauses())
3599 if (!isa<OMPNowaitClause>(C))
3600 return false;
3601
3602 return true;
3603 }
3604
EmitOMPForDirective(const OMPForDirective & S)3605 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3606 bool HasLastprivates = false;
3607 bool UseOMPIRBuilder =
3608 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3609 auto &&CodeGen = [this, &S, &HasLastprivates,
3610 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3611 // Use the OpenMPIRBuilder if enabled.
3612 if (UseOMPIRBuilder) {
3613 // Emit the associated statement and get its loop representation.
3614 const Stmt *Inner = S.getRawStmt();
3615 llvm::CanonicalLoopInfo *CLI =
3616 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3617
3618 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3619 llvm::OpenMPIRBuilder &OMPBuilder =
3620 CGM.getOpenMPRuntime().getOMPBuilder();
3621 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3622 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3623 OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
3624 return;
3625 }
3626
3627 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3628 };
3629 {
3630 auto LPCRegion =
3631 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3632 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3633 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3634 S.hasCancel());
3635 }
3636
3637 if (!UseOMPIRBuilder) {
3638 // Emit an implicit barrier at the end.
3639 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3640 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3641 }
3642 // Check for outer lastprivate conditional update.
3643 checkForLastprivateConditionalUpdate(*this, S);
3644 }
3645
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3646 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3647 bool HasLastprivates = false;
3648 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3649 PrePostActionTy &) {
3650 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3651 };
3652 {
3653 auto LPCRegion =
3654 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3655 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3656 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3657 }
3658
3659 // Emit an implicit barrier at the end.
3660 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3661 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3662 // Check for outer lastprivate conditional update.
3663 checkForLastprivateConditionalUpdate(*this, S);
3664 }
3665
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)3666 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3667 const Twine &Name,
3668 llvm::Value *Init = nullptr) {
3669 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3670 if (Init)
3671 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3672 return LVal;
3673 }
3674
EmitSections(const OMPExecutableDirective & S)3675 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3676 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3677 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3678 bool HasLastprivates = false;
3679 auto &&CodeGen = [&S, CapturedStmt, CS,
3680 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3681 const ASTContext &C = CGF.getContext();
3682 QualType KmpInt32Ty =
3683 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3684 // Emit helper vars inits.
3685 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3686 CGF.Builder.getInt32(0));
3687 llvm::ConstantInt *GlobalUBVal = CS != nullptr
3688 ? CGF.Builder.getInt32(CS->size() - 1)
3689 : CGF.Builder.getInt32(0);
3690 LValue UB =
3691 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3692 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3693 CGF.Builder.getInt32(1));
3694 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3695 CGF.Builder.getInt32(0));
3696 // Loop counter.
3697 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3698 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3699 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3700 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3701 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3702 // Generate condition for loop.
3703 BinaryOperator *Cond = BinaryOperator::Create(
3704 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3705 S.getBeginLoc(), FPOptionsOverride());
3706 // Increment for loop counter.
3707 UnaryOperator *Inc = UnaryOperator::Create(
3708 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3709 S.getBeginLoc(), true, FPOptionsOverride());
3710 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3711 // Iterate through all sections and emit a switch construct:
3712 // switch (IV) {
3713 // case 0:
3714 // <SectionStmt[0]>;
3715 // break;
3716 // ...
3717 // case <NumSection> - 1:
3718 // <SectionStmt[<NumSection> - 1]>;
3719 // break;
3720 // }
3721 // .omp.sections.exit:
3722 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3723 llvm::SwitchInst *SwitchStmt =
3724 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3725 ExitBB, CS == nullptr ? 1 : CS->size());
3726 if (CS) {
3727 unsigned CaseNumber = 0;
3728 for (const Stmt *SubStmt : CS->children()) {
3729 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3730 CGF.EmitBlock(CaseBB);
3731 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3732 CGF.EmitStmt(SubStmt);
3733 CGF.EmitBranch(ExitBB);
3734 ++CaseNumber;
3735 }
3736 } else {
3737 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3738 CGF.EmitBlock(CaseBB);
3739 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3740 CGF.EmitStmt(CapturedStmt);
3741 CGF.EmitBranch(ExitBB);
3742 }
3743 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3744 };
3745
3746 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3747 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3748 // Emit implicit barrier to synchronize threads and avoid data races on
3749 // initialization of firstprivate variables and post-update of lastprivate
3750 // variables.
3751 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3752 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3753 /*ForceSimpleCall=*/true);
3754 }
3755 CGF.EmitOMPPrivateClause(S, LoopScope);
3756 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3757 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3758 CGF.EmitOMPReductionClauseInit(S, LoopScope);
3759 (void)LoopScope.Privatize();
3760 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3761 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3762
3763 // Emit static non-chunked loop.
3764 OpenMPScheduleTy ScheduleKind;
3765 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3766 CGOpenMPRuntime::StaticRTInput StaticInit(
3767 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3768 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3769 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3770 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3771 // UB = min(UB, GlobalUB);
3772 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3773 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3774 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3775 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3776 // IV = LB;
3777 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3778 // while (idx <= UB) { BODY; ++idx; }
3779 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3780 [](CodeGenFunction &) {});
3781 // Tell the runtime we are done.
3782 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3783 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3784 S.getDirectiveKind());
3785 };
3786 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3787 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3788 // Emit post-update of the reduction variables if IsLastIter != 0.
3789 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3790 return CGF.Builder.CreateIsNotNull(
3791 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3792 });
3793
3794 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3795 if (HasLastprivates)
3796 CGF.EmitOMPLastprivateClauseFinal(
3797 S, /*NoFinals=*/false,
3798 CGF.Builder.CreateIsNotNull(
3799 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3800 };
3801
3802 bool HasCancel = false;
3803 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3804 HasCancel = OSD->hasCancel();
3805 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3806 HasCancel = OPSD->hasCancel();
3807 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3808 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3809 HasCancel);
3810 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3811 // clause. Otherwise the barrier will be generated by the codegen for the
3812 // directive.
3813 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3814 // Emit implicit barrier to synchronize threads and avoid data races on
3815 // initialization of firstprivate variables.
3816 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3817 OMPD_unknown);
3818 }
3819 }
3820
EmitOMPSectionsDirective(const OMPSectionsDirective & S)3821 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3822 if (CGM.getLangOpts().OpenMPIRBuilder) {
3823 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3824 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3825 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3826
3827 auto FiniCB = [this](InsertPointTy IP) {
3828 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3829 };
3830
3831 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
3832 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3833 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3834 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3835 if (CS) {
3836 for (const Stmt *SubStmt : CS->children()) {
3837 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
3838 InsertPointTy CodeGenIP,
3839 llvm::BasicBlock &FiniBB) {
3840 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
3841 FiniBB);
3842 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
3843 FiniBB);
3844 };
3845 SectionCBVector.push_back(SectionCB);
3846 }
3847 } else {
3848 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
3849 InsertPointTy CodeGenIP,
3850 llvm::BasicBlock &FiniBB) {
3851 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3852 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
3853 FiniBB);
3854 };
3855 SectionCBVector.push_back(SectionCB);
3856 }
3857
3858 // Privatization callback that performs appropriate action for
3859 // shared/private/firstprivate/lastprivate/copyin/... variables.
3860 //
3861 // TODO: This defaults to shared right now.
3862 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3863 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3864 // The next line is appropriate only for variables (Val) with the
3865 // data-sharing attribute "shared".
3866 ReplVal = &Val;
3867
3868 return CodeGenIP;
3869 };
3870
3871 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
3872 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3873 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3874 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3875 Builder.restoreIP(OMPBuilder.createSections(
3876 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
3877 S.getSingleClause<OMPNowaitClause>()));
3878 return;
3879 }
3880 {
3881 auto LPCRegion =
3882 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3883 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3884 EmitSections(S);
3885 }
3886 // Emit an implicit barrier at the end.
3887 if (!S.getSingleClause<OMPNowaitClause>()) {
3888 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3889 OMPD_sections);
3890 }
3891 // Check for outer lastprivate conditional update.
3892 checkForLastprivateConditionalUpdate(*this, S);
3893 }
3894
EmitOMPSectionDirective(const OMPSectionDirective & S)3895 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3896 if (CGM.getLangOpts().OpenMPIRBuilder) {
3897 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3898 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3899
3900 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
3901 auto FiniCB = [this](InsertPointTy IP) {
3902 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3903 };
3904
3905 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
3906 InsertPointTy CodeGenIP,
3907 llvm::BasicBlock &FiniBB) {
3908 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3909 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
3910 CodeGenIP, FiniBB);
3911 };
3912
3913 LexicalScope Scope(*this, S.getSourceRange());
3914 EmitStopPoint(&S);
3915 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
3916
3917 return;
3918 }
3919 LexicalScope Scope(*this, S.getSourceRange());
3920 EmitStopPoint(&S);
3921 EmitStmt(S.getAssociatedStmt());
3922 }
3923
EmitOMPSingleDirective(const OMPSingleDirective & S)3924 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3925 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3926 llvm::SmallVector<const Expr *, 8> DestExprs;
3927 llvm::SmallVector<const Expr *, 8> SrcExprs;
3928 llvm::SmallVector<const Expr *, 8> AssignmentOps;
3929 // Check if there are any 'copyprivate' clauses associated with this
3930 // 'single' construct.
3931 // Build a list of copyprivate variables along with helper expressions
3932 // (<source>, <destination>, <destination>=<source> expressions)
3933 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3934 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3935 DestExprs.append(C->destination_exprs().begin(),
3936 C->destination_exprs().end());
3937 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3938 AssignmentOps.append(C->assignment_ops().begin(),
3939 C->assignment_ops().end());
3940 }
3941 // Emit code for 'single' region along with 'copyprivate' clauses
3942 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3943 Action.Enter(CGF);
3944 OMPPrivateScope SingleScope(CGF);
3945 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3946 CGF.EmitOMPPrivateClause(S, SingleScope);
3947 (void)SingleScope.Privatize();
3948 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3949 };
3950 {
3951 auto LPCRegion =
3952 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3953 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3954 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3955 CopyprivateVars, DestExprs,
3956 SrcExprs, AssignmentOps);
3957 }
3958 // Emit an implicit barrier at the end (to avoid data race on firstprivate
3959 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3960 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3961 CGM.getOpenMPRuntime().emitBarrierCall(
3962 *this, S.getBeginLoc(),
3963 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3964 }
3965 // Check for outer lastprivate conditional update.
3966 checkForLastprivateConditionalUpdate(*this, S);
3967 }
3968
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)3969 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3970 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3971 Action.Enter(CGF);
3972 CGF.EmitStmt(S.getRawStmt());
3973 };
3974 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3975 }
3976
EmitOMPMasterDirective(const OMPMasterDirective & S)3977 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3978 if (CGM.getLangOpts().OpenMPIRBuilder) {
3979 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3980 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3981
3982 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3983
3984 auto FiniCB = [this](InsertPointTy IP) {
3985 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3986 };
3987
3988 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3989 InsertPointTy CodeGenIP,
3990 llvm::BasicBlock &FiniBB) {
3991 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3992 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3993 CodeGenIP, FiniBB);
3994 };
3995
3996 LexicalScope Scope(*this, S.getSourceRange());
3997 EmitStopPoint(&S);
3998 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3999
4000 return;
4001 }
4002 LexicalScope Scope(*this, S.getSourceRange());
4003 EmitStopPoint(&S);
4004 emitMaster(*this, S);
4005 }
4006
emitMasked(CodeGenFunction & CGF,const OMPExecutableDirective & S)4007 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4008 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4009 Action.Enter(CGF);
4010 CGF.EmitStmt(S.getRawStmt());
4011 };
4012 Expr *Filter = nullptr;
4013 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4014 Filter = FilterClause->getThreadID();
4015 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4016 Filter);
4017 }
4018
EmitOMPMaskedDirective(const OMPMaskedDirective & S)4019 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4020 if (CGM.getLangOpts().OpenMPIRBuilder) {
4021 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4022 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4023
4024 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4025 const Expr *Filter = nullptr;
4026 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4027 Filter = FilterClause->getThreadID();
4028 llvm::Value *FilterVal = Filter
4029 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4030 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4031
4032 auto FiniCB = [this](InsertPointTy IP) {
4033 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4034 };
4035
4036 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4037 InsertPointTy CodeGenIP,
4038 llvm::BasicBlock &FiniBB) {
4039 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4040 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4041 CodeGenIP, FiniBB);
4042 };
4043
4044 LexicalScope Scope(*this, S.getSourceRange());
4045 EmitStopPoint(&S);
4046 Builder.restoreIP(
4047 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4048
4049 return;
4050 }
4051 LexicalScope Scope(*this, S.getSourceRange());
4052 EmitStopPoint(&S);
4053 emitMasked(*this, S);
4054 }
4055
EmitOMPCriticalDirective(const OMPCriticalDirective & S)4056 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4057 if (CGM.getLangOpts().OpenMPIRBuilder) {
4058 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4059 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4060
4061 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4062 const Expr *Hint = nullptr;
4063 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4064 Hint = HintClause->getHint();
4065
4066 // TODO: This is slightly different from what's currently being done in
4067 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4068 // about typing is final.
4069 llvm::Value *HintInst = nullptr;
4070 if (Hint)
4071 HintInst =
4072 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4073
4074 auto FiniCB = [this](InsertPointTy IP) {
4075 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4076 };
4077
4078 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4079 InsertPointTy CodeGenIP,
4080 llvm::BasicBlock &FiniBB) {
4081 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4082 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
4083 CodeGenIP, FiniBB);
4084 };
4085
4086 LexicalScope Scope(*this, S.getSourceRange());
4087 EmitStopPoint(&S);
4088 Builder.restoreIP(OMPBuilder.createCritical(
4089 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4090 HintInst));
4091
4092 return;
4093 }
4094
4095 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4096 Action.Enter(CGF);
4097 CGF.EmitStmt(S.getAssociatedStmt());
4098 };
4099 const Expr *Hint = nullptr;
4100 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4101 Hint = HintClause->getHint();
4102 LexicalScope Scope(*this, S.getSourceRange());
4103 EmitStopPoint(&S);
4104 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4105 S.getDirectiveName().getAsString(),
4106 CodeGen, S.getBeginLoc(), Hint);
4107 }
4108
EmitOMPParallelForDirective(const OMPParallelForDirective & S)4109 void CodeGenFunction::EmitOMPParallelForDirective(
4110 const OMPParallelForDirective &S) {
4111 // Emit directive as a combined directive that consists of two implicit
4112 // directives: 'parallel' with 'for' directive.
4113 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4114 Action.Enter(CGF);
4115 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4116 };
4117 {
4118 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4119 [](const OMPReductionClause *C) {
4120 return C->getModifier() == OMPC_REDUCTION_inscan;
4121 })) {
4122 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4123 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4124 CGCapturedStmtInfo CGSI(CR_OpenMP);
4125 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4126 OMPLoopScope LoopScope(CGF, S);
4127 return CGF.EmitScalarExpr(S.getNumIterations());
4128 };
4129 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4130 }
4131 auto LPCRegion =
4132 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4133 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4134 emitEmptyBoundParameters);
4135 }
4136 // Check for outer lastprivate conditional update.
4137 checkForLastprivateConditionalUpdate(*this, S);
4138 }
4139
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)4140 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4141 const OMPParallelForSimdDirective &S) {
4142 // Emit directive as a combined directive that consists of two implicit
4143 // directives: 'parallel' with 'for' directive.
4144 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4145 Action.Enter(CGF);
4146 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4147 };
4148 {
4149 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4150 [](const OMPReductionClause *C) {
4151 return C->getModifier() == OMPC_REDUCTION_inscan;
4152 })) {
4153 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4154 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4155 CGCapturedStmtInfo CGSI(CR_OpenMP);
4156 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4157 OMPLoopScope LoopScope(CGF, S);
4158 return CGF.EmitScalarExpr(S.getNumIterations());
4159 };
4160 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4161 }
4162 auto LPCRegion =
4163 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4164 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4165 emitEmptyBoundParameters);
4166 }
4167 // Check for outer lastprivate conditional update.
4168 checkForLastprivateConditionalUpdate(*this, S);
4169 }
4170
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)4171 void CodeGenFunction::EmitOMPParallelMasterDirective(
4172 const OMPParallelMasterDirective &S) {
4173 // Emit directive as a combined directive that consists of two implicit
4174 // directives: 'parallel' with 'master' directive.
4175 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4176 Action.Enter(CGF);
4177 OMPPrivateScope PrivateScope(CGF);
4178 bool Copyins = CGF.EmitOMPCopyinClause(S);
4179 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4180 if (Copyins) {
4181 // Emit implicit barrier to synchronize threads and avoid data races on
4182 // propagation master's thread values of threadprivate variables to local
4183 // instances of that variables of all other implicit threads.
4184 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4185 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4186 /*ForceSimpleCall=*/true);
4187 }
4188 CGF.EmitOMPPrivateClause(S, PrivateScope);
4189 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4190 (void)PrivateScope.Privatize();
4191 emitMaster(CGF, S);
4192 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4193 };
4194 {
4195 auto LPCRegion =
4196 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4197 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4198 emitEmptyBoundParameters);
4199 emitPostUpdateForReductionClause(*this, S,
4200 [](CodeGenFunction &) { return nullptr; });
4201 }
4202 // Check for outer lastprivate conditional update.
4203 checkForLastprivateConditionalUpdate(*this, S);
4204 }
4205
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)4206 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4207 const OMPParallelSectionsDirective &S) {
4208 // Emit directive as a combined directive that consists of two implicit
4209 // directives: 'parallel' with 'sections' directive.
4210 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4211 Action.Enter(CGF);
4212 CGF.EmitSections(S);
4213 };
4214 {
4215 auto LPCRegion =
4216 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4217 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4218 emitEmptyBoundParameters);
4219 }
4220 // Check for outer lastprivate conditional update.
4221 checkForLastprivateConditionalUpdate(*this, S);
4222 }
4223
4224 namespace {
4225 /// Get the list of variables declared in the context of the untied tasks.
4226 class CheckVarsEscapingUntiedTaskDeclContext final
4227 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4228 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4229
4230 public:
4231 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4232 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
VisitDeclStmt(const DeclStmt * S)4233 void VisitDeclStmt(const DeclStmt *S) {
4234 if (!S)
4235 return;
4236 // Need to privatize only local vars, static locals can be processed as is.
4237 for (const Decl *D : S->decls()) {
4238 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4239 if (VD->hasLocalStorage())
4240 PrivateDecls.push_back(VD);
4241 }
4242 }
VisitOMPExecutableDirective(const OMPExecutableDirective *)4243 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
VisitCapturedStmt(const CapturedStmt *)4244 void VisitCapturedStmt(const CapturedStmt *) { return; }
VisitLambdaExpr(const LambdaExpr *)4245 void VisitLambdaExpr(const LambdaExpr *) { return; }
VisitBlockExpr(const BlockExpr *)4246 void VisitBlockExpr(const BlockExpr *) { return; }
VisitStmt(const Stmt * S)4247 void VisitStmt(const Stmt *S) {
4248 if (!S)
4249 return;
4250 for (const Stmt *Child : S->children())
4251 if (Child)
4252 Visit(Child);
4253 }
4254
4255 /// Swaps list of vars with the provided one.
getPrivateDecls() const4256 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4257 };
4258 } // anonymous namespace
4259
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)4260 void CodeGenFunction::EmitOMPTaskBasedDirective(
4261 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4262 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4263 OMPTaskDataTy &Data) {
4264 // Emit outlined function for task construct.
4265 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4266 auto I = CS->getCapturedDecl()->param_begin();
4267 auto PartId = std::next(I);
4268 auto TaskT = std::next(I, 4);
4269 // Check if the task is final
4270 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4271 // If the condition constant folds and can be elided, try to avoid emitting
4272 // the condition and the dead arm of the if/else.
4273 const Expr *Cond = Clause->getCondition();
4274 bool CondConstant;
4275 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4276 Data.Final.setInt(CondConstant);
4277 else
4278 Data.Final.setPointer(EvaluateExprAsBool(Cond));
4279 } else {
4280 // By default the task is not final.
4281 Data.Final.setInt(/*IntVal=*/false);
4282 }
4283 // Check if the task has 'priority' clause.
4284 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4285 const Expr *Prio = Clause->getPriority();
4286 Data.Priority.setInt(/*IntVal=*/true);
4287 Data.Priority.setPointer(EmitScalarConversion(
4288 EmitScalarExpr(Prio), Prio->getType(),
4289 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4290 Prio->getExprLoc()));
4291 }
4292 // The first function argument for tasks is a thread id, the second one is a
4293 // part id (0 for tied tasks, >=0 for untied task).
4294 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4295 // Get list of private variables.
4296 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4297 auto IRef = C->varlist_begin();
4298 for (const Expr *IInit : C->private_copies()) {
4299 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4300 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4301 Data.PrivateVars.push_back(*IRef);
4302 Data.PrivateCopies.push_back(IInit);
4303 }
4304 ++IRef;
4305 }
4306 }
4307 EmittedAsPrivate.clear();
4308 // Get list of firstprivate variables.
4309 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4310 auto IRef = C->varlist_begin();
4311 auto IElemInitRef = C->inits().begin();
4312 for (const Expr *IInit : C->private_copies()) {
4313 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4314 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4315 Data.FirstprivateVars.push_back(*IRef);
4316 Data.FirstprivateCopies.push_back(IInit);
4317 Data.FirstprivateInits.push_back(*IElemInitRef);
4318 }
4319 ++IRef;
4320 ++IElemInitRef;
4321 }
4322 }
4323 // Get list of lastprivate variables (for taskloops).
4324 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4325 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4326 auto IRef = C->varlist_begin();
4327 auto ID = C->destination_exprs().begin();
4328 for (const Expr *IInit : C->private_copies()) {
4329 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4330 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4331 Data.LastprivateVars.push_back(*IRef);
4332 Data.LastprivateCopies.push_back(IInit);
4333 }
4334 LastprivateDstsOrigs.insert(
4335 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4336 cast<DeclRefExpr>(*IRef)));
4337 ++IRef;
4338 ++ID;
4339 }
4340 }
4341 SmallVector<const Expr *, 4> LHSs;
4342 SmallVector<const Expr *, 4> RHSs;
4343 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4344 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4345 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4346 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4347 Data.ReductionOps.append(C->reduction_ops().begin(),
4348 C->reduction_ops().end());
4349 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4350 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4351 }
4352 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4353 *this, S.getBeginLoc(), LHSs, RHSs, Data);
4354 // Build list of dependences.
4355 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4356 OMPTaskDataTy::DependData &DD =
4357 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4358 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4359 }
4360 // Get list of local vars for untied tasks.
4361 if (!Data.Tied) {
4362 CheckVarsEscapingUntiedTaskDeclContext Checker;
4363 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4364 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4365 Checker.getPrivateDecls().end());
4366 }
4367 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4368 CapturedRegion](CodeGenFunction &CGF,
4369 PrePostActionTy &Action) {
4370 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4371 std::pair<Address, Address>>
4372 UntiedLocalVars;
4373 // Set proper addresses for generated private copies.
4374 OMPPrivateScope Scope(CGF);
4375 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4376 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4377 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4378 enum { PrivatesParam = 2, CopyFnParam = 3 };
4379 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4380 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4381 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4382 CS->getCapturedDecl()->getParam(PrivatesParam)));
4383 // Map privates.
4384 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4385 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4386 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4387 CallArgs.push_back(PrivatesPtr);
4388 ParamTypes.push_back(PrivatesPtr->getType());
4389 for (const Expr *E : Data.PrivateVars) {
4390 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4391 Address PrivatePtr = CGF.CreateMemTemp(
4392 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4393 PrivatePtrs.emplace_back(VD, PrivatePtr);
4394 CallArgs.push_back(PrivatePtr.getPointer());
4395 ParamTypes.push_back(PrivatePtr.getType());
4396 }
4397 for (const Expr *E : Data.FirstprivateVars) {
4398 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4399 Address PrivatePtr =
4400 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4401 ".firstpriv.ptr.addr");
4402 PrivatePtrs.emplace_back(VD, PrivatePtr);
4403 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4404 CallArgs.push_back(PrivatePtr.getPointer());
4405 ParamTypes.push_back(PrivatePtr.getType());
4406 }
4407 for (const Expr *E : Data.LastprivateVars) {
4408 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4409 Address PrivatePtr =
4410 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4411 ".lastpriv.ptr.addr");
4412 PrivatePtrs.emplace_back(VD, PrivatePtr);
4413 CallArgs.push_back(PrivatePtr.getPointer());
4414 ParamTypes.push_back(PrivatePtr.getType());
4415 }
4416 for (const VarDecl *VD : Data.PrivateLocals) {
4417 QualType Ty = VD->getType().getNonReferenceType();
4418 if (VD->getType()->isLValueReferenceType())
4419 Ty = CGF.getContext().getPointerType(Ty);
4420 if (isAllocatableDecl(VD))
4421 Ty = CGF.getContext().getPointerType(Ty);
4422 Address PrivatePtr = CGF.CreateMemTemp(
4423 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4424 auto Result = UntiedLocalVars.insert(
4425 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4426 // If key exists update in place.
4427 if (Result.second == false)
4428 *Result.first = std::make_pair(
4429 VD, std::make_pair(PrivatePtr, Address::invalid()));
4430 CallArgs.push_back(PrivatePtr.getPointer());
4431 ParamTypes.push_back(PrivatePtr.getType());
4432 }
4433 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4434 ParamTypes, /*isVarArg=*/false);
4435 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4436 CopyFn, CopyFnTy->getPointerTo());
4437 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4438 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4439 for (const auto &Pair : LastprivateDstsOrigs) {
4440 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4441 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4442 /*RefersToEnclosingVariableOrCapture=*/
4443 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4444 Pair.second->getType(), VK_LValue,
4445 Pair.second->getExprLoc());
4446 Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4447 return CGF.EmitLValue(&DRE).getAddress(CGF);
4448 });
4449 }
4450 for (const auto &Pair : PrivatePtrs) {
4451 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4452 CGF.getContext().getDeclAlign(Pair.first));
4453 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4454 }
4455 // Adjust mapping for internal locals by mapping actual memory instead of
4456 // a pointer to this memory.
4457 for (auto &Pair : UntiedLocalVars) {
4458 if (isAllocatableDecl(Pair.first)) {
4459 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4460 Address Replacement(Ptr, CGF.getPointerAlign());
4461 Pair.second.first = Replacement;
4462 Ptr = CGF.Builder.CreateLoad(Replacement);
4463 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4464 Pair.second.second = Replacement;
4465 } else {
4466 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4467 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4468 Pair.second.first = Replacement;
4469 }
4470 }
4471 }
4472 if (Data.Reductions) {
4473 OMPPrivateScope FirstprivateScope(CGF);
4474 for (const auto &Pair : FirstprivatePtrs) {
4475 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4476 CGF.getContext().getDeclAlign(Pair.first));
4477 FirstprivateScope.addPrivate(Pair.first,
4478 [Replacement]() { return Replacement; });
4479 }
4480 (void)FirstprivateScope.Privatize();
4481 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4482 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4483 Data.ReductionCopies, Data.ReductionOps);
4484 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4485 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4486 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4487 RedCG.emitSharedOrigLValue(CGF, Cnt);
4488 RedCG.emitAggregateType(CGF, Cnt);
4489 // FIXME: This must removed once the runtime library is fixed.
4490 // Emit required threadprivate variables for
4491 // initializer/combiner/finalizer.
4492 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4493 RedCG, Cnt);
4494 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4495 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4496 Replacement =
4497 Address(CGF.EmitScalarConversion(
4498 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4499 CGF.getContext().getPointerType(
4500 Data.ReductionCopies[Cnt]->getType()),
4501 Data.ReductionCopies[Cnt]->getExprLoc()),
4502 Replacement.getAlignment());
4503 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4504 Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4505 [Replacement]() { return Replacement; });
4506 }
4507 }
4508 // Privatize all private variables except for in_reduction items.
4509 (void)Scope.Privatize();
4510 SmallVector<const Expr *, 4> InRedVars;
4511 SmallVector<const Expr *, 4> InRedPrivs;
4512 SmallVector<const Expr *, 4> InRedOps;
4513 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4514 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4515 auto IPriv = C->privates().begin();
4516 auto IRed = C->reduction_ops().begin();
4517 auto ITD = C->taskgroup_descriptors().begin();
4518 for (const Expr *Ref : C->varlists()) {
4519 InRedVars.emplace_back(Ref);
4520 InRedPrivs.emplace_back(*IPriv);
4521 InRedOps.emplace_back(*IRed);
4522 TaskgroupDescriptors.emplace_back(*ITD);
4523 std::advance(IPriv, 1);
4524 std::advance(IRed, 1);
4525 std::advance(ITD, 1);
4526 }
4527 }
4528 // Privatize in_reduction items here, because taskgroup descriptors must be
4529 // privatized earlier.
4530 OMPPrivateScope InRedScope(CGF);
4531 if (!InRedVars.empty()) {
4532 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4533 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4534 RedCG.emitSharedOrigLValue(CGF, Cnt);
4535 RedCG.emitAggregateType(CGF, Cnt);
4536 // The taskgroup descriptor variable is always implicit firstprivate and
4537 // privatized already during processing of the firstprivates.
4538 // FIXME: This must removed once the runtime library is fixed.
4539 // Emit required threadprivate variables for
4540 // initializer/combiner/finalizer.
4541 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4542 RedCG, Cnt);
4543 llvm::Value *ReductionsPtr;
4544 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4545 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4546 TRExpr->getExprLoc());
4547 } else {
4548 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4549 }
4550 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4551 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4552 Replacement = Address(
4553 CGF.EmitScalarConversion(
4554 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4555 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4556 InRedPrivs[Cnt]->getExprLoc()),
4557 Replacement.getAlignment());
4558 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4559 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4560 [Replacement]() { return Replacement; });
4561 }
4562 }
4563 (void)InRedScope.Privatize();
4564
4565 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4566 UntiedLocalVars);
4567 Action.Enter(CGF);
4568 BodyGen(CGF);
4569 };
4570 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4571 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4572 Data.NumberOfParts);
4573 OMPLexicalScope Scope(*this, S, llvm::None,
4574 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4575 !isOpenMPSimdDirective(S.getDirectiveKind()));
4576 TaskGen(*this, OutlinedFn, Data);
4577 }
4578
4579 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)4580 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4581 QualType Ty, CapturedDecl *CD,
4582 SourceLocation Loc) {
4583 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4584 ImplicitParamDecl::Other);
4585 auto *OrigRef = DeclRefExpr::Create(
4586 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4587 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4588 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4589 ImplicitParamDecl::Other);
4590 auto *PrivateRef = DeclRefExpr::Create(
4591 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4592 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4593 QualType ElemType = C.getBaseElementType(Ty);
4594 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4595 ImplicitParamDecl::Other);
4596 auto *InitRef = DeclRefExpr::Create(
4597 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4598 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4599 PrivateVD->setInitStyle(VarDecl::CInit);
4600 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4601 InitRef, /*BasePath=*/nullptr,
4602 VK_PRValue, FPOptionsOverride()));
4603 Data.FirstprivateVars.emplace_back(OrigRef);
4604 Data.FirstprivateCopies.emplace_back(PrivateRef);
4605 Data.FirstprivateInits.emplace_back(InitRef);
4606 return OrigVD;
4607 }
4608
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)4609 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4610 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4611 OMPTargetDataInfo &InputInfo) {
4612 // Emit outlined function for task construct.
4613 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4614 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4615 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4616 auto I = CS->getCapturedDecl()->param_begin();
4617 auto PartId = std::next(I);
4618 auto TaskT = std::next(I, 4);
4619 OMPTaskDataTy Data;
4620 // The task is not final.
4621 Data.Final.setInt(/*IntVal=*/false);
4622 // Get list of firstprivate variables.
4623 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4624 auto IRef = C->varlist_begin();
4625 auto IElemInitRef = C->inits().begin();
4626 for (auto *IInit : C->private_copies()) {
4627 Data.FirstprivateVars.push_back(*IRef);
4628 Data.FirstprivateCopies.push_back(IInit);
4629 Data.FirstprivateInits.push_back(*IElemInitRef);
4630 ++IRef;
4631 ++IElemInitRef;
4632 }
4633 }
4634 OMPPrivateScope TargetScope(*this);
4635 VarDecl *BPVD = nullptr;
4636 VarDecl *PVD = nullptr;
4637 VarDecl *SVD = nullptr;
4638 VarDecl *MVD = nullptr;
4639 if (InputInfo.NumberOfTargetItems > 0) {
4640 auto *CD = CapturedDecl::Create(
4641 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4642 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4643 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4644 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4645 /*IndexTypeQuals=*/0);
4646 BPVD = createImplicitFirstprivateForType(
4647 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4648 PVD = createImplicitFirstprivateForType(
4649 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4650 QualType SizesType = getContext().getConstantArrayType(
4651 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4652 ArrSize, nullptr, ArrayType::Normal,
4653 /*IndexTypeQuals=*/0);
4654 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4655 S.getBeginLoc());
4656 TargetScope.addPrivate(
4657 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4658 TargetScope.addPrivate(PVD,
4659 [&InputInfo]() { return InputInfo.PointersArray; });
4660 TargetScope.addPrivate(SVD,
4661 [&InputInfo]() { return InputInfo.SizesArray; });
4662 // If there is no user-defined mapper, the mapper array will be nullptr. In
4663 // this case, we don't need to privatize it.
4664 if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4665 InputInfo.MappersArray.getPointer())) {
4666 MVD = createImplicitFirstprivateForType(
4667 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4668 TargetScope.addPrivate(MVD,
4669 [&InputInfo]() { return InputInfo.MappersArray; });
4670 }
4671 }
4672 (void)TargetScope.Privatize();
4673 // Build list of dependences.
4674 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4675 OMPTaskDataTy::DependData &DD =
4676 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4677 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4678 }
4679 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4680 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4681 // Set proper addresses for generated private copies.
4682 OMPPrivateScope Scope(CGF);
4683 if (!Data.FirstprivateVars.empty()) {
4684 enum { PrivatesParam = 2, CopyFnParam = 3 };
4685 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4686 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4687 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4688 CS->getCapturedDecl()->getParam(PrivatesParam)));
4689 // Map privates.
4690 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4691 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4692 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4693 CallArgs.push_back(PrivatesPtr);
4694 ParamTypes.push_back(PrivatesPtr->getType());
4695 for (const Expr *E : Data.FirstprivateVars) {
4696 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4697 Address PrivatePtr =
4698 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4699 ".firstpriv.ptr.addr");
4700 PrivatePtrs.emplace_back(VD, PrivatePtr);
4701 CallArgs.push_back(PrivatePtr.getPointer());
4702 ParamTypes.push_back(PrivatePtr.getType());
4703 }
4704 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4705 ParamTypes, /*isVarArg=*/false);
4706 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4707 CopyFn, CopyFnTy->getPointerTo());
4708 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4709 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4710 for (const auto &Pair : PrivatePtrs) {
4711 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4712 CGF.getContext().getDeclAlign(Pair.first));
4713 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4714 }
4715 }
4716 // Privatize all private variables except for in_reduction items.
4717 (void)Scope.Privatize();
4718 if (InputInfo.NumberOfTargetItems > 0) {
4719 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4720 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4721 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4722 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4723 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4724 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4725 // If MVD is nullptr, the mapper array is not privatized
4726 if (MVD)
4727 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4728 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4729 }
4730
4731 Action.Enter(CGF);
4732 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4733 BodyGen(CGF);
4734 };
4735 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4736 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4737 Data.NumberOfParts);
4738 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4739 IntegerLiteral IfCond(getContext(), TrueOrFalse,
4740 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4741 SourceLocation());
4742
4743 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4744 SharedsTy, CapturedStruct, &IfCond, Data);
4745 }
4746
EmitOMPTaskDirective(const OMPTaskDirective & S)4747 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4748 // Emit outlined function for task construct.
4749 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4750 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4751 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4752 const Expr *IfCond = nullptr;
4753 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4754 if (C->getNameModifier() == OMPD_unknown ||
4755 C->getNameModifier() == OMPD_task) {
4756 IfCond = C->getCondition();
4757 break;
4758 }
4759 }
4760
4761 OMPTaskDataTy Data;
4762 // Check if we should emit tied or untied task.
4763 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4764 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4765 CGF.EmitStmt(CS->getCapturedStmt());
4766 };
4767 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4768 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4769 const OMPTaskDataTy &Data) {
4770 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4771 SharedsTy, CapturedStruct, IfCond,
4772 Data);
4773 };
4774 auto LPCRegion =
4775 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4776 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4777 }
4778
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)4779 void CodeGenFunction::EmitOMPTaskyieldDirective(
4780 const OMPTaskyieldDirective &S) {
4781 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4782 }
4783
EmitOMPBarrierDirective(const OMPBarrierDirective & S)4784 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4785 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4786 }
4787
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)4788 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4789 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4790 }
4791
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)4792 void CodeGenFunction::EmitOMPTaskgroupDirective(
4793 const OMPTaskgroupDirective &S) {
4794 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4795 Action.Enter(CGF);
4796 if (const Expr *E = S.getReductionRef()) {
4797 SmallVector<const Expr *, 4> LHSs;
4798 SmallVector<const Expr *, 4> RHSs;
4799 OMPTaskDataTy Data;
4800 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4801 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4802 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4803 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4804 Data.ReductionOps.append(C->reduction_ops().begin(),
4805 C->reduction_ops().end());
4806 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4807 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4808 }
4809 llvm::Value *ReductionDesc =
4810 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4811 LHSs, RHSs, Data);
4812 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4813 CGF.EmitVarDecl(*VD);
4814 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4815 /*Volatile=*/false, E->getType());
4816 }
4817 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4818 };
4819 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4820 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4821 }
4822
EmitOMPFlushDirective(const OMPFlushDirective & S)4823 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4824 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4825 ? llvm::AtomicOrdering::NotAtomic
4826 : llvm::AtomicOrdering::AcquireRelease;
4827 CGM.getOpenMPRuntime().emitFlush(
4828 *this,
4829 [&S]() -> ArrayRef<const Expr *> {
4830 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4831 return llvm::makeArrayRef(FlushClause->varlist_begin(),
4832 FlushClause->varlist_end());
4833 return llvm::None;
4834 }(),
4835 S.getBeginLoc(), AO);
4836 }
4837
EmitOMPDepobjDirective(const OMPDepobjDirective & S)4838 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4839 const auto *DO = S.getSingleClause<OMPDepobjClause>();
4840 LValue DOLVal = EmitLValue(DO->getDepobj());
4841 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4842 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4843 DC->getModifier());
4844 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4845 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4846 *this, Dependencies, DC->getBeginLoc());
4847 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4848 return;
4849 }
4850 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4851 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4852 return;
4853 }
4854 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4855 CGM.getOpenMPRuntime().emitUpdateClause(
4856 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4857 return;
4858 }
4859 }
4860
EmitOMPScanDirective(const OMPScanDirective & S)4861 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4862 if (!OMPParentLoopDirectiveForScan)
4863 return;
4864 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4865 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4866 SmallVector<const Expr *, 4> Shareds;
4867 SmallVector<const Expr *, 4> Privates;
4868 SmallVector<const Expr *, 4> LHSs;
4869 SmallVector<const Expr *, 4> RHSs;
4870 SmallVector<const Expr *, 4> ReductionOps;
4871 SmallVector<const Expr *, 4> CopyOps;
4872 SmallVector<const Expr *, 4> CopyArrayTemps;
4873 SmallVector<const Expr *, 4> CopyArrayElems;
4874 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4875 if (C->getModifier() != OMPC_REDUCTION_inscan)
4876 continue;
4877 Shareds.append(C->varlist_begin(), C->varlist_end());
4878 Privates.append(C->privates().begin(), C->privates().end());
4879 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4880 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4881 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4882 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4883 CopyArrayTemps.append(C->copy_array_temps().begin(),
4884 C->copy_array_temps().end());
4885 CopyArrayElems.append(C->copy_array_elems().begin(),
4886 C->copy_array_elems().end());
4887 }
4888 if (ParentDir.getDirectiveKind() == OMPD_simd ||
4889 (getLangOpts().OpenMPSimd &&
4890 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4891 // For simd directive and simd-based directives in simd only mode, use the
4892 // following codegen:
4893 // int x = 0;
4894 // #pragma omp simd reduction(inscan, +: x)
4895 // for (..) {
4896 // <first part>
4897 // #pragma omp scan inclusive(x)
4898 // <second part>
4899 // }
4900 // is transformed to:
4901 // int x = 0;
4902 // for (..) {
4903 // int x_priv = 0;
4904 // <first part>
4905 // x = x_priv + x;
4906 // x_priv = x;
4907 // <second part>
4908 // }
4909 // and
4910 // int x = 0;
4911 // #pragma omp simd reduction(inscan, +: x)
4912 // for (..) {
4913 // <first part>
4914 // #pragma omp scan exclusive(x)
4915 // <second part>
4916 // }
4917 // to
4918 // int x = 0;
4919 // for (..) {
4920 // int x_priv = 0;
4921 // <second part>
4922 // int temp = x;
4923 // x = x_priv + x;
4924 // x_priv = temp;
4925 // <first part>
4926 // }
4927 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4928 EmitBranch(IsInclusive
4929 ? OMPScanReduce
4930 : BreakContinueStack.back().ContinueBlock.getBlock());
4931 EmitBlock(OMPScanDispatch);
4932 {
4933 // New scope for correct construction/destruction of temp variables for
4934 // exclusive scan.
4935 LexicalScope Scope(*this, S.getSourceRange());
4936 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4937 EmitBlock(OMPScanReduce);
4938 if (!IsInclusive) {
4939 // Create temp var and copy LHS value to this temp value.
4940 // TMP = LHS;
4941 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4942 const Expr *PrivateExpr = Privates[I];
4943 const Expr *TempExpr = CopyArrayTemps[I];
4944 EmitAutoVarDecl(
4945 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4946 LValue DestLVal = EmitLValue(TempExpr);
4947 LValue SrcLVal = EmitLValue(LHSs[I]);
4948 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4949 SrcLVal.getAddress(*this),
4950 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4951 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4952 CopyOps[I]);
4953 }
4954 }
4955 CGM.getOpenMPRuntime().emitReduction(
4956 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4957 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4958 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4959 const Expr *PrivateExpr = Privates[I];
4960 LValue DestLVal;
4961 LValue SrcLVal;
4962 if (IsInclusive) {
4963 DestLVal = EmitLValue(RHSs[I]);
4964 SrcLVal = EmitLValue(LHSs[I]);
4965 } else {
4966 const Expr *TempExpr = CopyArrayTemps[I];
4967 DestLVal = EmitLValue(RHSs[I]);
4968 SrcLVal = EmitLValue(TempExpr);
4969 }
4970 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4971 SrcLVal.getAddress(*this),
4972 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4973 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4974 CopyOps[I]);
4975 }
4976 }
4977 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4978 OMPScanExitBlock = IsInclusive
4979 ? BreakContinueStack.back().ContinueBlock.getBlock()
4980 : OMPScanReduce;
4981 EmitBlock(OMPAfterScanBlock);
4982 return;
4983 }
4984 if (!IsInclusive) {
4985 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4986 EmitBlock(OMPScanExitBlock);
4987 }
4988 if (OMPFirstScanLoop) {
4989 // Emit buffer[i] = red; at the end of the input phase.
4990 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4991 .getIterationVariable()
4992 ->IgnoreParenImpCasts();
4993 LValue IdxLVal = EmitLValue(IVExpr);
4994 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4995 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4996 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4997 const Expr *PrivateExpr = Privates[I];
4998 const Expr *OrigExpr = Shareds[I];
4999 const Expr *CopyArrayElem = CopyArrayElems[I];
5000 OpaqueValueMapping IdxMapping(
5001 *this,
5002 cast<OpaqueValueExpr>(
5003 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5004 RValue::get(IdxVal));
5005 LValue DestLVal = EmitLValue(CopyArrayElem);
5006 LValue SrcLVal = EmitLValue(OrigExpr);
5007 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5008 SrcLVal.getAddress(*this),
5009 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5010 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5011 CopyOps[I]);
5012 }
5013 }
5014 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5015 if (IsInclusive) {
5016 EmitBlock(OMPScanExitBlock);
5017 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5018 }
5019 EmitBlock(OMPScanDispatch);
5020 if (!OMPFirstScanLoop) {
5021 // Emit red = buffer[i]; at the entrance to the scan phase.
5022 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5023 .getIterationVariable()
5024 ->IgnoreParenImpCasts();
5025 LValue IdxLVal = EmitLValue(IVExpr);
5026 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5027 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5028 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5029 if (!IsInclusive) {
5030 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5031 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5032 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5033 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5034 EmitBlock(ContBB);
5035 // Use idx - 1 iteration for exclusive scan.
5036 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5037 }
5038 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5039 const Expr *PrivateExpr = Privates[I];
5040 const Expr *OrigExpr = Shareds[I];
5041 const Expr *CopyArrayElem = CopyArrayElems[I];
5042 OpaqueValueMapping IdxMapping(
5043 *this,
5044 cast<OpaqueValueExpr>(
5045 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5046 RValue::get(IdxVal));
5047 LValue SrcLVal = EmitLValue(CopyArrayElem);
5048 LValue DestLVal = EmitLValue(OrigExpr);
5049 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5050 SrcLVal.getAddress(*this),
5051 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5052 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5053 CopyOps[I]);
5054 }
5055 if (!IsInclusive) {
5056 EmitBlock(ExclusiveExitBB);
5057 }
5058 }
5059 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5060 : OMPAfterScanBlock);
5061 EmitBlock(OMPAfterScanBlock);
5062 }
5063
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)5064 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5065 const CodeGenLoopTy &CodeGenLoop,
5066 Expr *IncExpr) {
5067 // Emit the loop iteration variable.
5068 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5069 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5070 EmitVarDecl(*IVDecl);
5071
5072 // Emit the iterations count variable.
5073 // If it is not a variable, Sema decided to calculate iterations count on each
5074 // iteration (e.g., it is foldable into a constant).
5075 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5076 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5077 // Emit calculation of the iterations count.
5078 EmitIgnoredExpr(S.getCalcLastIteration());
5079 }
5080
5081 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5082
5083 bool HasLastprivateClause = false;
5084 // Check pre-condition.
5085 {
5086 OMPLoopScope PreInitScope(*this, S);
5087 // Skip the entire loop if we don't meet the precondition.
5088 // If the condition constant folds and can be elided, avoid emitting the
5089 // whole loop.
5090 bool CondConstant;
5091 llvm::BasicBlock *ContBlock = nullptr;
5092 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5093 if (!CondConstant)
5094 return;
5095 } else {
5096 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5097 ContBlock = createBasicBlock("omp.precond.end");
5098 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5099 getProfileCount(&S));
5100 EmitBlock(ThenBlock);
5101 incrementProfileCounter(&S);
5102 }
5103
5104 emitAlignedClause(*this, S);
5105 // Emit 'then' code.
5106 {
5107 // Emit helper vars inits.
5108
5109 LValue LB = EmitOMPHelperVar(
5110 *this, cast<DeclRefExpr>(
5111 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5112 ? S.getCombinedLowerBoundVariable()
5113 : S.getLowerBoundVariable())));
5114 LValue UB = EmitOMPHelperVar(
5115 *this, cast<DeclRefExpr>(
5116 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5117 ? S.getCombinedUpperBoundVariable()
5118 : S.getUpperBoundVariable())));
5119 LValue ST =
5120 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5121 LValue IL =
5122 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5123
5124 OMPPrivateScope LoopScope(*this);
5125 if (EmitOMPFirstprivateClause(S, LoopScope)) {
5126 // Emit implicit barrier to synchronize threads and avoid data races
5127 // on initialization of firstprivate variables and post-update of
5128 // lastprivate variables.
5129 CGM.getOpenMPRuntime().emitBarrierCall(
5130 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5131 /*ForceSimpleCall=*/true);
5132 }
5133 EmitOMPPrivateClause(S, LoopScope);
5134 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5135 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5136 !isOpenMPTeamsDirective(S.getDirectiveKind()))
5137 EmitOMPReductionClauseInit(S, LoopScope);
5138 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5139 EmitOMPPrivateLoopCounters(S, LoopScope);
5140 (void)LoopScope.Privatize();
5141 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5142 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5143
5144 // Detect the distribute schedule kind and chunk.
5145 llvm::Value *Chunk = nullptr;
5146 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5147 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5148 ScheduleKind = C->getDistScheduleKind();
5149 if (const Expr *Ch = C->getChunkSize()) {
5150 Chunk = EmitScalarExpr(Ch);
5151 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5152 S.getIterationVariable()->getType(),
5153 S.getBeginLoc());
5154 }
5155 } else {
5156 // Default behaviour for dist_schedule clause.
5157 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5158 *this, S, ScheduleKind, Chunk);
5159 }
5160 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5161 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5162
5163 // OpenMP [2.10.8, distribute Construct, Description]
5164 // If dist_schedule is specified, kind must be static. If specified,
5165 // iterations are divided into chunks of size chunk_size, chunks are
5166 // assigned to the teams of the league in a round-robin fashion in the
5167 // order of the team number. When no chunk_size is specified, the
5168 // iteration space is divided into chunks that are approximately equal
5169 // in size, and at most one chunk is distributed to each team of the
5170 // league. The size of the chunks is unspecified in this case.
5171 bool StaticChunked = RT.isStaticChunked(
5172 ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5173 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5174 if (RT.isStaticNonchunked(ScheduleKind,
5175 /* Chunked */ Chunk != nullptr) ||
5176 StaticChunked) {
5177 CGOpenMPRuntime::StaticRTInput StaticInit(
5178 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5179 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5180 StaticChunked ? Chunk : nullptr);
5181 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5182 StaticInit);
5183 JumpDest LoopExit =
5184 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5185 // UB = min(UB, GlobalUB);
5186 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5187 ? S.getCombinedEnsureUpperBound()
5188 : S.getEnsureUpperBound());
5189 // IV = LB;
5190 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5191 ? S.getCombinedInit()
5192 : S.getInit());
5193
5194 const Expr *Cond =
5195 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5196 ? S.getCombinedCond()
5197 : S.getCond();
5198
5199 if (StaticChunked)
5200 Cond = S.getCombinedDistCond();
5201
5202 // For static unchunked schedules generate:
5203 //
5204 // 1. For distribute alone, codegen
5205 // while (idx <= UB) {
5206 // BODY;
5207 // ++idx;
5208 // }
5209 //
5210 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5211 // while (idx <= UB) {
5212 // <CodeGen rest of pragma>(LB, UB);
5213 // idx += ST;
5214 // }
5215 //
5216 // For static chunk one schedule generate:
5217 //
5218 // while (IV <= GlobalUB) {
5219 // <CodeGen rest of pragma>(LB, UB);
5220 // LB += ST;
5221 // UB += ST;
5222 // UB = min(UB, GlobalUB);
5223 // IV = LB;
5224 // }
5225 //
5226 emitCommonSimdLoop(
5227 *this, S,
5228 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5229 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5230 CGF.EmitOMPSimdInit(S);
5231 },
5232 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5233 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5234 CGF.EmitOMPInnerLoop(
5235 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5236 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5237 CodeGenLoop(CGF, S, LoopExit);
5238 },
5239 [&S, StaticChunked](CodeGenFunction &CGF) {
5240 if (StaticChunked) {
5241 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5242 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5243 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5244 CGF.EmitIgnoredExpr(S.getCombinedInit());
5245 }
5246 });
5247 });
5248 EmitBlock(LoopExit.getBlock());
5249 // Tell the runtime we are done.
5250 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5251 } else {
5252 // Emit the outer loop, which requests its work chunk [LB..UB] from
5253 // runtime and runs the inner loop to process it.
5254 const OMPLoopArguments LoopArguments = {
5255 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5256 IL.getAddress(*this), Chunk};
5257 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5258 CodeGenLoop);
5259 }
5260 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5261 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5262 return CGF.Builder.CreateIsNotNull(
5263 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5264 });
5265 }
5266 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5267 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5268 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5269 EmitOMPReductionClauseFinal(S, OMPD_simd);
5270 // Emit post-update of the reduction variables if IsLastIter != 0.
5271 emitPostUpdateForReductionClause(
5272 *this, S, [IL, &S](CodeGenFunction &CGF) {
5273 return CGF.Builder.CreateIsNotNull(
5274 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5275 });
5276 }
5277 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5278 if (HasLastprivateClause) {
5279 EmitOMPLastprivateClauseFinal(
5280 S, /*NoFinals=*/false,
5281 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5282 }
5283 }
5284
5285 // We're now done with the loop, so jump to the continuation block.
5286 if (ContBlock) {
5287 EmitBranch(ContBlock);
5288 EmitBlock(ContBlock, true);
5289 }
5290 }
5291 }
5292
EmitOMPDistributeDirective(const OMPDistributeDirective & S)5293 void CodeGenFunction::EmitOMPDistributeDirective(
5294 const OMPDistributeDirective &S) {
5295 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5296 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5297 };
5298 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5299 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5300 }
5301
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)5302 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5303 const CapturedStmt *S,
5304 SourceLocation Loc) {
5305 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5306 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5307 CGF.CapturedStmtInfo = &CapStmtInfo;
5308 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5309 Fn->setDoesNotRecurse();
5310 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
5311 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
5312 return Fn;
5313 }
5314
EmitOMPOrderedDirective(const OMPOrderedDirective & S)5315 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5316 if (S.hasClausesOfKind<OMPDependClause>()) {
5317 assert(!S.hasAssociatedStmt() &&
5318 "No associated statement must be in ordered depend construct.");
5319 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5320 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5321 return;
5322 }
5323 const auto *C = S.getSingleClause<OMPSIMDClause>();
5324 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5325 PrePostActionTy &Action) {
5326 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5327 if (C) {
5328 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5329 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5330 llvm::Function *OutlinedFn =
5331 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5332 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5333 OutlinedFn, CapturedVars);
5334 } else {
5335 Action.Enter(CGF);
5336 CGF.EmitStmt(CS->getCapturedStmt());
5337 }
5338 };
5339 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5340 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5341 }
5342
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)5343 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5344 QualType SrcType, QualType DestType,
5345 SourceLocation Loc) {
5346 assert(CGF.hasScalarEvaluationKind(DestType) &&
5347 "DestType must have scalar evaluation kind.");
5348 assert(!Val.isAggregate() && "Must be a scalar or complex.");
5349 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5350 DestType, Loc)
5351 : CGF.EmitComplexToScalarConversion(
5352 Val.getComplexVal(), SrcType, DestType, Loc);
5353 }
5354
5355 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)5356 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5357 QualType DestType, SourceLocation Loc) {
5358 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5359 "DestType must have complex evaluation kind.");
5360 CodeGenFunction::ComplexPairTy ComplexVal;
5361 if (Val.isScalar()) {
5362 // Convert the input element to the element type of the complex.
5363 QualType DestElementType =
5364 DestType->castAs<ComplexType>()->getElementType();
5365 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5366 Val.getScalarVal(), SrcType, DestElementType, Loc);
5367 ComplexVal = CodeGenFunction::ComplexPairTy(
5368 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5369 } else {
5370 assert(Val.isComplex() && "Must be a scalar or complex.");
5371 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5372 QualType DestElementType =
5373 DestType->castAs<ComplexType>()->getElementType();
5374 ComplexVal.first = CGF.EmitScalarConversion(
5375 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5376 ComplexVal.second = CGF.EmitScalarConversion(
5377 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5378 }
5379 return ComplexVal;
5380 }
5381
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)5382 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5383 LValue LVal, RValue RVal) {
5384 if (LVal.isGlobalReg())
5385 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
5386 else
5387 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
5388 }
5389
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)5390 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
5391 llvm::AtomicOrdering AO, LValue LVal,
5392 SourceLocation Loc) {
5393 if (LVal.isGlobalReg())
5394 return CGF.EmitLoadOfLValue(LVal, Loc);
5395 return CGF.EmitAtomicLoad(
5396 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
5397 LVal.isVolatile());
5398 }
5399
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)5400 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
5401 QualType RValTy, SourceLocation Loc) {
5402 switch (getEvaluationKind(LVal.getType())) {
5403 case TEK_Scalar:
5404 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
5405 *this, RVal, RValTy, LVal.getType(), Loc)),
5406 LVal);
5407 break;
5408 case TEK_Complex:
5409 EmitStoreOfComplex(
5410 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
5411 /*isInit=*/false);
5412 break;
5413 case TEK_Aggregate:
5414 llvm_unreachable("Must be a scalar or complex.");
5415 }
5416 }
5417
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)5418 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5419 const Expr *X, const Expr *V,
5420 SourceLocation Loc) {
5421 // v = x;
5422 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
5423 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
5424 LValue XLValue = CGF.EmitLValue(X);
5425 LValue VLValue = CGF.EmitLValue(V);
5426 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
5427 // OpenMP, 2.17.7, atomic Construct
5428 // If the read or capture clause is specified and the acquire, acq_rel, or
5429 // seq_cst clause is specified then the strong flush on exit from the atomic
5430 // operation is also an acquire flush.
5431 switch (AO) {
5432 case llvm::AtomicOrdering::Acquire:
5433 case llvm::AtomicOrdering::AcquireRelease:
5434 case llvm::AtomicOrdering::SequentiallyConsistent:
5435 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5436 llvm::AtomicOrdering::Acquire);
5437 break;
5438 case llvm::AtomicOrdering::Monotonic:
5439 case llvm::AtomicOrdering::Release:
5440 break;
5441 case llvm::AtomicOrdering::NotAtomic:
5442 case llvm::AtomicOrdering::Unordered:
5443 llvm_unreachable("Unexpected ordering.");
5444 }
5445 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
5446 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5447 }
5448
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)5449 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
5450 llvm::AtomicOrdering AO, const Expr *X,
5451 const Expr *E, SourceLocation Loc) {
5452 // x = expr;
5453 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
5454 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
5455 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5456 // OpenMP, 2.17.7, atomic Construct
5457 // If the write, update, or capture clause is specified and the release,
5458 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5459 // the atomic operation is also a release flush.
5460 switch (AO) {
5461 case llvm::AtomicOrdering::Release:
5462 case llvm::AtomicOrdering::AcquireRelease:
5463 case llvm::AtomicOrdering::SequentiallyConsistent:
5464 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5465 llvm::AtomicOrdering::Release);
5466 break;
5467 case llvm::AtomicOrdering::Acquire:
5468 case llvm::AtomicOrdering::Monotonic:
5469 break;
5470 case llvm::AtomicOrdering::NotAtomic:
5471 case llvm::AtomicOrdering::Unordered:
5472 llvm_unreachable("Unexpected ordering.");
5473 }
5474 }
5475
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)5476 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5477 RValue Update,
5478 BinaryOperatorKind BO,
5479 llvm::AtomicOrdering AO,
5480 bool IsXLHSInRHSPart) {
5481 ASTContext &Context = CGF.getContext();
5482 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5483 // expression is simple and atomic is allowed for the given type for the
5484 // target platform.
5485 if (BO == BO_Comma || !Update.isScalar() ||
5486 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5487 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5488 (Update.getScalarVal()->getType() !=
5489 X.getAddress(CGF).getElementType())) ||
5490 !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5491 !Context.getTargetInfo().hasBuiltinAtomic(
5492 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5493 return std::make_pair(false, RValue::get(nullptr));
5494
5495 llvm::AtomicRMWInst::BinOp RMWOp;
5496 switch (BO) {
5497 case BO_Add:
5498 RMWOp = llvm::AtomicRMWInst::Add;
5499 break;
5500 case BO_Sub:
5501 if (!IsXLHSInRHSPart)
5502 return std::make_pair(false, RValue::get(nullptr));
5503 RMWOp = llvm::AtomicRMWInst::Sub;
5504 break;
5505 case BO_And:
5506 RMWOp = llvm::AtomicRMWInst::And;
5507 break;
5508 case BO_Or:
5509 RMWOp = llvm::AtomicRMWInst::Or;
5510 break;
5511 case BO_Xor:
5512 RMWOp = llvm::AtomicRMWInst::Xor;
5513 break;
5514 case BO_LT:
5515 RMWOp = X.getType()->hasSignedIntegerRepresentation()
5516 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5517 : llvm::AtomicRMWInst::Max)
5518 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5519 : llvm::AtomicRMWInst::UMax);
5520 break;
5521 case BO_GT:
5522 RMWOp = X.getType()->hasSignedIntegerRepresentation()
5523 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5524 : llvm::AtomicRMWInst::Min)
5525 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5526 : llvm::AtomicRMWInst::UMin);
5527 break;
5528 case BO_Assign:
5529 RMWOp = llvm::AtomicRMWInst::Xchg;
5530 break;
5531 case BO_Mul:
5532 case BO_Div:
5533 case BO_Rem:
5534 case BO_Shl:
5535 case BO_Shr:
5536 case BO_LAnd:
5537 case BO_LOr:
5538 return std::make_pair(false, RValue::get(nullptr));
5539 case BO_PtrMemD:
5540 case BO_PtrMemI:
5541 case BO_LE:
5542 case BO_GE:
5543 case BO_EQ:
5544 case BO_NE:
5545 case BO_Cmp:
5546 case BO_AddAssign:
5547 case BO_SubAssign:
5548 case BO_AndAssign:
5549 case BO_OrAssign:
5550 case BO_XorAssign:
5551 case BO_MulAssign:
5552 case BO_DivAssign:
5553 case BO_RemAssign:
5554 case BO_ShlAssign:
5555 case BO_ShrAssign:
5556 case BO_Comma:
5557 llvm_unreachable("Unsupported atomic update operation");
5558 }
5559 llvm::Value *UpdateVal = Update.getScalarVal();
5560 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5561 UpdateVal = CGF.Builder.CreateIntCast(
5562 IC, X.getAddress(CGF).getElementType(),
5563 X.getType()->hasSignedIntegerRepresentation());
5564 }
5565 llvm::Value *Res =
5566 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5567 return std::make_pair(true, RValue::get(Res));
5568 }
5569
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)5570 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5571 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5572 llvm::AtomicOrdering AO, SourceLocation Loc,
5573 const llvm::function_ref<RValue(RValue)> CommonGen) {
5574 // Update expressions are allowed to have the following forms:
5575 // x binop= expr; -> xrval + expr;
5576 // x++, ++x -> xrval + 1;
5577 // x--, --x -> xrval - 1;
5578 // x = x binop expr; -> xrval binop expr
5579 // x = expr Op x; - > expr binop xrval;
5580 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5581 if (!Res.first) {
5582 if (X.isGlobalReg()) {
5583 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5584 // 'xrval'.
5585 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5586 } else {
5587 // Perform compare-and-swap procedure.
5588 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5589 }
5590 }
5591 return Res;
5592 }
5593
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5594 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5595 llvm::AtomicOrdering AO, const Expr *X,
5596 const Expr *E, const Expr *UE,
5597 bool IsXLHSInRHSPart, SourceLocation Loc) {
5598 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5599 "Update expr in 'atomic update' must be a binary operator.");
5600 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5601 // Update expressions are allowed to have the following forms:
5602 // x binop= expr; -> xrval + expr;
5603 // x++, ++x -> xrval + 1;
5604 // x--, --x -> xrval - 1;
5605 // x = x binop expr; -> xrval binop expr
5606 // x = expr Op x; - > expr binop xrval;
5607 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5608 LValue XLValue = CGF.EmitLValue(X);
5609 RValue ExprRValue = CGF.EmitAnyExpr(E);
5610 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5611 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5612 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5613 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5614 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5615 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5616 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5617 return CGF.EmitAnyExpr(UE);
5618 };
5619 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5620 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5621 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5622 // OpenMP, 2.17.7, atomic Construct
5623 // If the write, update, or capture clause is specified and the release,
5624 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5625 // the atomic operation is also a release flush.
5626 switch (AO) {
5627 case llvm::AtomicOrdering::Release:
5628 case llvm::AtomicOrdering::AcquireRelease:
5629 case llvm::AtomicOrdering::SequentiallyConsistent:
5630 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5631 llvm::AtomicOrdering::Release);
5632 break;
5633 case llvm::AtomicOrdering::Acquire:
5634 case llvm::AtomicOrdering::Monotonic:
5635 break;
5636 case llvm::AtomicOrdering::NotAtomic:
5637 case llvm::AtomicOrdering::Unordered:
5638 llvm_unreachable("Unexpected ordering.");
5639 }
5640 }
5641
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)5642 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5643 QualType SourceType, QualType ResType,
5644 SourceLocation Loc) {
5645 switch (CGF.getEvaluationKind(ResType)) {
5646 case TEK_Scalar:
5647 return RValue::get(
5648 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5649 case TEK_Complex: {
5650 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5651 return RValue::getComplex(Res.first, Res.second);
5652 }
5653 case TEK_Aggregate:
5654 break;
5655 }
5656 llvm_unreachable("Must be a scalar or complex.");
5657 }
5658
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5659 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5660 llvm::AtomicOrdering AO,
5661 bool IsPostfixUpdate, const Expr *V,
5662 const Expr *X, const Expr *E,
5663 const Expr *UE, bool IsXLHSInRHSPart,
5664 SourceLocation Loc) {
5665 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5666 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5667 RValue NewVVal;
5668 LValue VLValue = CGF.EmitLValue(V);
5669 LValue XLValue = CGF.EmitLValue(X);
5670 RValue ExprRValue = CGF.EmitAnyExpr(E);
5671 QualType NewVValType;
5672 if (UE) {
5673 // 'x' is updated with some additional value.
5674 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5675 "Update expr in 'atomic capture' must be a binary operator.");
5676 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5677 // Update expressions are allowed to have the following forms:
5678 // x binop= expr; -> xrval + expr;
5679 // x++, ++x -> xrval + 1;
5680 // x--, --x -> xrval - 1;
5681 // x = x binop expr; -> xrval binop expr
5682 // x = expr Op x; - > expr binop xrval;
5683 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5684 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5685 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5686 NewVValType = XRValExpr->getType();
5687 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5688 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5689 IsPostfixUpdate](RValue XRValue) {
5690 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5691 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5692 RValue Res = CGF.EmitAnyExpr(UE);
5693 NewVVal = IsPostfixUpdate ? XRValue : Res;
5694 return Res;
5695 };
5696 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5697 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5698 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5699 if (Res.first) {
5700 // 'atomicrmw' instruction was generated.
5701 if (IsPostfixUpdate) {
5702 // Use old value from 'atomicrmw'.
5703 NewVVal = Res.second;
5704 } else {
5705 // 'atomicrmw' does not provide new value, so evaluate it using old
5706 // value of 'x'.
5707 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5708 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
5709 NewVVal = CGF.EmitAnyExpr(UE);
5710 }
5711 }
5712 } else {
5713 // 'x' is simply rewritten with some 'expr'.
5714 NewVValType = X->getType().getNonReferenceType();
5715 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
5716 X->getType().getNonReferenceType(), Loc);
5717 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
5718 NewVVal = XRValue;
5719 return ExprRValue;
5720 };
5721 // Try to perform atomicrmw xchg, otherwise simple exchange.
5722 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5723 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
5724 Loc, Gen);
5725 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5726 if (Res.first) {
5727 // 'atomicrmw' instruction was generated.
5728 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
5729 }
5730 }
5731 // Emit post-update store to 'v' of old/new 'x' value.
5732 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
5733 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5734 // OpenMP 5.1 removes the required flush for capture clause.
5735 if (CGF.CGM.getLangOpts().OpenMP < 51) {
5736 // OpenMP, 2.17.7, atomic Construct
5737 // If the write, update, or capture clause is specified and the release,
5738 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5739 // the atomic operation is also a release flush.
5740 // If the read or capture clause is specified and the acquire, acq_rel, or
5741 // seq_cst clause is specified then the strong flush on exit from the atomic
5742 // operation is also an acquire flush.
5743 switch (AO) {
5744 case llvm::AtomicOrdering::Release:
5745 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5746 llvm::AtomicOrdering::Release);
5747 break;
5748 case llvm::AtomicOrdering::Acquire:
5749 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5750 llvm::AtomicOrdering::Acquire);
5751 break;
5752 case llvm::AtomicOrdering::AcquireRelease:
5753 case llvm::AtomicOrdering::SequentiallyConsistent:
5754 CGF.CGM.getOpenMPRuntime().emitFlush(
5755 CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease);
5756 break;
5757 case llvm::AtomicOrdering::Monotonic:
5758 break;
5759 case llvm::AtomicOrdering::NotAtomic:
5760 case llvm::AtomicOrdering::Unordered:
5761 llvm_unreachable("Unexpected ordering.");
5762 }
5763 }
5764 }
5765
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5766 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
5767 llvm::AtomicOrdering AO, bool IsPostfixUpdate,
5768 const Expr *X, const Expr *V, const Expr *E,
5769 const Expr *UE, bool IsXLHSInRHSPart,
5770 SourceLocation Loc) {
5771 switch (Kind) {
5772 case OMPC_read:
5773 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
5774 break;
5775 case OMPC_write:
5776 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
5777 break;
5778 case OMPC_unknown:
5779 case OMPC_update:
5780 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
5781 break;
5782 case OMPC_capture:
5783 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
5784 IsXLHSInRHSPart, Loc);
5785 break;
5786 case OMPC_if:
5787 case OMPC_final:
5788 case OMPC_num_threads:
5789 case OMPC_private:
5790 case OMPC_firstprivate:
5791 case OMPC_lastprivate:
5792 case OMPC_reduction:
5793 case OMPC_task_reduction:
5794 case OMPC_in_reduction:
5795 case OMPC_safelen:
5796 case OMPC_simdlen:
5797 case OMPC_sizes:
5798 case OMPC_full:
5799 case OMPC_partial:
5800 case OMPC_allocator:
5801 case OMPC_allocate:
5802 case OMPC_collapse:
5803 case OMPC_default:
5804 case OMPC_seq_cst:
5805 case OMPC_acq_rel:
5806 case OMPC_acquire:
5807 case OMPC_release:
5808 case OMPC_relaxed:
5809 case OMPC_shared:
5810 case OMPC_linear:
5811 case OMPC_aligned:
5812 case OMPC_copyin:
5813 case OMPC_copyprivate:
5814 case OMPC_flush:
5815 case OMPC_depobj:
5816 case OMPC_proc_bind:
5817 case OMPC_schedule:
5818 case OMPC_ordered:
5819 case OMPC_nowait:
5820 case OMPC_untied:
5821 case OMPC_threadprivate:
5822 case OMPC_depend:
5823 case OMPC_mergeable:
5824 case OMPC_device:
5825 case OMPC_threads:
5826 case OMPC_simd:
5827 case OMPC_map:
5828 case OMPC_num_teams:
5829 case OMPC_thread_limit:
5830 case OMPC_priority:
5831 case OMPC_grainsize:
5832 case OMPC_nogroup:
5833 case OMPC_num_tasks:
5834 case OMPC_hint:
5835 case OMPC_dist_schedule:
5836 case OMPC_defaultmap:
5837 case OMPC_uniform:
5838 case OMPC_to:
5839 case OMPC_from:
5840 case OMPC_use_device_ptr:
5841 case OMPC_use_device_addr:
5842 case OMPC_is_device_ptr:
5843 case OMPC_unified_address:
5844 case OMPC_unified_shared_memory:
5845 case OMPC_reverse_offload:
5846 case OMPC_dynamic_allocators:
5847 case OMPC_atomic_default_mem_order:
5848 case OMPC_device_type:
5849 case OMPC_match:
5850 case OMPC_nontemporal:
5851 case OMPC_order:
5852 case OMPC_destroy:
5853 case OMPC_detach:
5854 case OMPC_inclusive:
5855 case OMPC_exclusive:
5856 case OMPC_uses_allocators:
5857 case OMPC_affinity:
5858 case OMPC_init:
5859 case OMPC_inbranch:
5860 case OMPC_notinbranch:
5861 case OMPC_link:
5862 case OMPC_use:
5863 case OMPC_novariants:
5864 case OMPC_nocontext:
5865 case OMPC_filter:
5866 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
5867 }
5868 }
5869
EmitOMPAtomicDirective(const OMPAtomicDirective & S)5870 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
5871 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
5872 bool MemOrderingSpecified = false;
5873 if (S.getSingleClause<OMPSeqCstClause>()) {
5874 AO = llvm::AtomicOrdering::SequentiallyConsistent;
5875 MemOrderingSpecified = true;
5876 } else if (S.getSingleClause<OMPAcqRelClause>()) {
5877 AO = llvm::AtomicOrdering::AcquireRelease;
5878 MemOrderingSpecified = true;
5879 } else if (S.getSingleClause<OMPAcquireClause>()) {
5880 AO = llvm::AtomicOrdering::Acquire;
5881 MemOrderingSpecified = true;
5882 } else if (S.getSingleClause<OMPReleaseClause>()) {
5883 AO = llvm::AtomicOrdering::Release;
5884 MemOrderingSpecified = true;
5885 } else if (S.getSingleClause<OMPRelaxedClause>()) {
5886 AO = llvm::AtomicOrdering::Monotonic;
5887 MemOrderingSpecified = true;
5888 }
5889 OpenMPClauseKind Kind = OMPC_unknown;
5890 for (const OMPClause *C : S.clauses()) {
5891 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
5892 // if it is first).
5893 if (C->getClauseKind() != OMPC_seq_cst &&
5894 C->getClauseKind() != OMPC_acq_rel &&
5895 C->getClauseKind() != OMPC_acquire &&
5896 C->getClauseKind() != OMPC_release &&
5897 C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) {
5898 Kind = C->getClauseKind();
5899 break;
5900 }
5901 }
5902 if (!MemOrderingSpecified) {
5903 llvm::AtomicOrdering DefaultOrder =
5904 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
5905 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
5906 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
5907 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
5908 Kind == OMPC_capture)) {
5909 AO = DefaultOrder;
5910 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
5911 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
5912 AO = llvm::AtomicOrdering::Release;
5913 } else if (Kind == OMPC_read) {
5914 assert(Kind == OMPC_read && "Unexpected atomic kind.");
5915 AO = llvm::AtomicOrdering::Acquire;
5916 }
5917 }
5918 }
5919
5920 LexicalScope Scope(*this, S.getSourceRange());
5921 EmitStopPoint(S.getAssociatedStmt());
5922 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
5923 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
5924 S.getBeginLoc());
5925 }
5926
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)5927 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
5928 const OMPExecutableDirective &S,
5929 const RegionCodeGenTy &CodeGen) {
5930 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
5931 CodeGenModule &CGM = CGF.CGM;
5932
5933 // On device emit this construct as inlined code.
5934 if (CGM.getLangOpts().OpenMPIsDevice) {
5935 OMPLexicalScope Scope(CGF, S, OMPD_target);
5936 CGM.getOpenMPRuntime().emitInlinedDirective(
5937 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5938 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5939 });
5940 return;
5941 }
5942
5943 auto LPCRegion =
5944 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
5945 llvm::Function *Fn = nullptr;
5946 llvm::Constant *FnID = nullptr;
5947
5948 const Expr *IfCond = nullptr;
5949 // Check for the at most one if clause associated with the target region.
5950 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5951 if (C->getNameModifier() == OMPD_unknown ||
5952 C->getNameModifier() == OMPD_target) {
5953 IfCond = C->getCondition();
5954 break;
5955 }
5956 }
5957
5958 // Check if we have any device clause associated with the directive.
5959 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
5960 nullptr, OMPC_DEVICE_unknown);
5961 if (auto *C = S.getSingleClause<OMPDeviceClause>())
5962 Device.setPointerAndInt(C->getDevice(), C->getModifier());
5963
5964 // Check if we have an if clause whose conditional always evaluates to false
5965 // or if we do not have any targets specified. If so the target region is not
5966 // an offload entry point.
5967 bool IsOffloadEntry = true;
5968 if (IfCond) {
5969 bool Val;
5970 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
5971 IsOffloadEntry = false;
5972 }
5973 if (CGM.getLangOpts().OMPTargetTriples.empty())
5974 IsOffloadEntry = false;
5975
5976 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
5977 StringRef ParentName;
5978 // In case we have Ctors/Dtors we use the complete type variant to produce
5979 // the mangling of the device outlined kernel.
5980 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
5981 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
5982 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
5983 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
5984 else
5985 ParentName =
5986 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
5987
5988 // Emit target region as a standalone region.
5989 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
5990 IsOffloadEntry, CodeGen);
5991 OMPLexicalScope Scope(CGF, S, OMPD_task);
5992 auto &&SizeEmitter =
5993 [IsOffloadEntry](CodeGenFunction &CGF,
5994 const OMPLoopDirective &D) -> llvm::Value * {
5995 if (IsOffloadEntry) {
5996 OMPLoopScope(CGF, D);
5997 // Emit calculation of the iterations count.
5998 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
5999 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6000 /*isSigned=*/false);
6001 return NumIterations;
6002 }
6003 return nullptr;
6004 };
6005 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6006 SizeEmitter);
6007 }
6008
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)6009 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6010 PrePostActionTy &Action) {
6011 Action.Enter(CGF);
6012 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6013 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6014 CGF.EmitOMPPrivateClause(S, PrivateScope);
6015 (void)PrivateScope.Privatize();
6016 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6017 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6018
6019 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6020 CGF.EnsureInsertPoint();
6021 }
6022
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)6023 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6024 StringRef ParentName,
6025 const OMPTargetDirective &S) {
6026 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6027 emitTargetRegion(CGF, S, Action);
6028 };
6029 llvm::Function *Fn;
6030 llvm::Constant *Addr;
6031 // Emit target region as a standalone region.
6032 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6033 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6034 assert(Fn && Addr && "Target device function emission failed.");
6035 }
6036
EmitOMPTargetDirective(const OMPTargetDirective & S)6037 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6038 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6039 emitTargetRegion(CGF, S, Action);
6040 };
6041 emitCommonOMPTargetDirective(*this, S, CodeGen);
6042 }
6043
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)6044 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6045 const OMPExecutableDirective &S,
6046 OpenMPDirectiveKind InnermostKind,
6047 const RegionCodeGenTy &CodeGen) {
6048 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6049 llvm::Function *OutlinedFn =
6050 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6051 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
6052
6053 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6054 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6055 if (NT || TL) {
6056 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6057 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6058
6059 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6060 S.getBeginLoc());
6061 }
6062
6063 OMPTeamsScope Scope(CGF, S);
6064 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6065 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6066 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6067 CapturedVars);
6068 }
6069
EmitOMPTeamsDirective(const OMPTeamsDirective & S)6070 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6071 // Emit teams region as a standalone region.
6072 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6073 Action.Enter(CGF);
6074 OMPPrivateScope PrivateScope(CGF);
6075 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6076 CGF.EmitOMPPrivateClause(S, PrivateScope);
6077 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6078 (void)PrivateScope.Privatize();
6079 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6080 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6081 };
6082 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6083 emitPostUpdateForReductionClause(*this, S,
6084 [](CodeGenFunction &) { return nullptr; });
6085 }
6086
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)6087 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6088 const OMPTargetTeamsDirective &S) {
6089 auto *CS = S.getCapturedStmt(OMPD_teams);
6090 Action.Enter(CGF);
6091 // Emit teams region as a standalone region.
6092 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6093 Action.Enter(CGF);
6094 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6095 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6096 CGF.EmitOMPPrivateClause(S, PrivateScope);
6097 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6098 (void)PrivateScope.Privatize();
6099 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6100 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6101 CGF.EmitStmt(CS->getCapturedStmt());
6102 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6103 };
6104 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6105 emitPostUpdateForReductionClause(CGF, S,
6106 [](CodeGenFunction &) { return nullptr; });
6107 }
6108
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)6109 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6110 CodeGenModule &CGM, StringRef ParentName,
6111 const OMPTargetTeamsDirective &S) {
6112 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6113 emitTargetTeamsRegion(CGF, Action, S);
6114 };
6115 llvm::Function *Fn;
6116 llvm::Constant *Addr;
6117 // Emit target region as a standalone region.
6118 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6119 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6120 assert(Fn && Addr && "Target device function emission failed.");
6121 }
6122
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)6123 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6124 const OMPTargetTeamsDirective &S) {
6125 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6126 emitTargetTeamsRegion(CGF, Action, S);
6127 };
6128 emitCommonOMPTargetDirective(*this, S, CodeGen);
6129 }
6130
6131 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)6132 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6133 const OMPTargetTeamsDistributeDirective &S) {
6134 Action.Enter(CGF);
6135 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6136 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6137 };
6138
6139 // Emit teams region as a standalone region.
6140 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6141 PrePostActionTy &Action) {
6142 Action.Enter(CGF);
6143 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6144 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6145 (void)PrivateScope.Privatize();
6146 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6147 CodeGenDistribute);
6148 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6149 };
6150 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6151 emitPostUpdateForReductionClause(CGF, S,
6152 [](CodeGenFunction &) { return nullptr; });
6153 }
6154
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)6155 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6156 CodeGenModule &CGM, StringRef ParentName,
6157 const OMPTargetTeamsDistributeDirective &S) {
6158 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6159 emitTargetTeamsDistributeRegion(CGF, Action, S);
6160 };
6161 llvm::Function *Fn;
6162 llvm::Constant *Addr;
6163 // Emit target region as a standalone region.
6164 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6165 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6166 assert(Fn && Addr && "Target device function emission failed.");
6167 }
6168
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)6169 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6170 const OMPTargetTeamsDistributeDirective &S) {
6171 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6172 emitTargetTeamsDistributeRegion(CGF, Action, S);
6173 };
6174 emitCommonOMPTargetDirective(*this, S, CodeGen);
6175 }
6176
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)6177 static void emitTargetTeamsDistributeSimdRegion(
6178 CodeGenFunction &CGF, PrePostActionTy &Action,
6179 const OMPTargetTeamsDistributeSimdDirective &S) {
6180 Action.Enter(CGF);
6181 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6182 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6183 };
6184
6185 // Emit teams region as a standalone region.
6186 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6187 PrePostActionTy &Action) {
6188 Action.Enter(CGF);
6189 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6190 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6191 (void)PrivateScope.Privatize();
6192 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6193 CodeGenDistribute);
6194 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6195 };
6196 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6197 emitPostUpdateForReductionClause(CGF, S,
6198 [](CodeGenFunction &) { return nullptr; });
6199 }
6200
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)6201 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6202 CodeGenModule &CGM, StringRef ParentName,
6203 const OMPTargetTeamsDistributeSimdDirective &S) {
6204 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6205 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6206 };
6207 llvm::Function *Fn;
6208 llvm::Constant *Addr;
6209 // Emit target region as a standalone region.
6210 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6211 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6212 assert(Fn && Addr && "Target device function emission failed.");
6213 }
6214
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)6215 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6216 const OMPTargetTeamsDistributeSimdDirective &S) {
6217 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6218 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6219 };
6220 emitCommonOMPTargetDirective(*this, S, CodeGen);
6221 }
6222
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)6223 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6224 const OMPTeamsDistributeDirective &S) {
6225
6226 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6227 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6228 };
6229
6230 // Emit teams region as a standalone region.
6231 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6232 PrePostActionTy &Action) {
6233 Action.Enter(CGF);
6234 OMPPrivateScope PrivateScope(CGF);
6235 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6236 (void)PrivateScope.Privatize();
6237 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6238 CodeGenDistribute);
6239 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6240 };
6241 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6242 emitPostUpdateForReductionClause(*this, S,
6243 [](CodeGenFunction &) { return nullptr; });
6244 }
6245
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)6246 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6247 const OMPTeamsDistributeSimdDirective &S) {
6248 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6249 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6250 };
6251
6252 // Emit teams region as a standalone region.
6253 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6254 PrePostActionTy &Action) {
6255 Action.Enter(CGF);
6256 OMPPrivateScope PrivateScope(CGF);
6257 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6258 (void)PrivateScope.Privatize();
6259 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6260 CodeGenDistribute);
6261 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6262 };
6263 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6264 emitPostUpdateForReductionClause(*this, S,
6265 [](CodeGenFunction &) { return nullptr; });
6266 }
6267
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)6268 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6269 const OMPTeamsDistributeParallelForDirective &S) {
6270 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6271 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6272 S.getDistInc());
6273 };
6274
6275 // Emit teams region as a standalone region.
6276 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6277 PrePostActionTy &Action) {
6278 Action.Enter(CGF);
6279 OMPPrivateScope PrivateScope(CGF);
6280 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6281 (void)PrivateScope.Privatize();
6282 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6283 CodeGenDistribute);
6284 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6285 };
6286 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6287 emitPostUpdateForReductionClause(*this, S,
6288 [](CodeGenFunction &) { return nullptr; });
6289 }
6290
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)6291 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6292 const OMPTeamsDistributeParallelForSimdDirective &S) {
6293 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6294 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6295 S.getDistInc());
6296 };
6297
6298 // Emit teams region as a standalone region.
6299 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6300 PrePostActionTy &Action) {
6301 Action.Enter(CGF);
6302 OMPPrivateScope PrivateScope(CGF);
6303 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6304 (void)PrivateScope.Privatize();
6305 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6306 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6307 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6308 };
6309 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6310 CodeGen);
6311 emitPostUpdateForReductionClause(*this, S,
6312 [](CodeGenFunction &) { return nullptr; });
6313 }
6314
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)6315 static void emitTargetTeamsDistributeParallelForRegion(
6316 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
6317 PrePostActionTy &Action) {
6318 Action.Enter(CGF);
6319 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6320 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6321 S.getDistInc());
6322 };
6323
6324 // Emit teams region as a standalone region.
6325 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6326 PrePostActionTy &Action) {
6327 Action.Enter(CGF);
6328 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6329 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6330 (void)PrivateScope.Privatize();
6331 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6332 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6333 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6334 };
6335
6336 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
6337 CodeGenTeams);
6338 emitPostUpdateForReductionClause(CGF, S,
6339 [](CodeGenFunction &) { return nullptr; });
6340 }
6341
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)6342 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
6343 CodeGenModule &CGM, StringRef ParentName,
6344 const OMPTargetTeamsDistributeParallelForDirective &S) {
6345 // Emit SPMD target teams distribute parallel for region as a standalone
6346 // region.
6347 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6348 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
6349 };
6350 llvm::Function *Fn;
6351 llvm::Constant *Addr;
6352 // Emit target region as a standalone region.
6353 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6354 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6355 assert(Fn && Addr && "Target device function emission failed.");
6356 }
6357
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)6358 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
6359 const OMPTargetTeamsDistributeParallelForDirective &S) {
6360 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6361 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
6362 };
6363 emitCommonOMPTargetDirective(*this, S, CodeGen);
6364 }
6365
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)6366 static void emitTargetTeamsDistributeParallelForSimdRegion(
6367 CodeGenFunction &CGF,
6368 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
6369 PrePostActionTy &Action) {
6370 Action.Enter(CGF);
6371 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6372 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6373 S.getDistInc());
6374 };
6375
6376 // Emit teams region as a standalone region.
6377 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6378 PrePostActionTy &Action) {
6379 Action.Enter(CGF);
6380 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6381 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6382 (void)PrivateScope.Privatize();
6383 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6384 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6385 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6386 };
6387
6388 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
6389 CodeGenTeams);
6390 emitPostUpdateForReductionClause(CGF, S,
6391 [](CodeGenFunction &) { return nullptr; });
6392 }
6393
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)6394 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
6395 CodeGenModule &CGM, StringRef ParentName,
6396 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
6397 // Emit SPMD target teams distribute parallel for simd region as a standalone
6398 // region.
6399 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6400 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
6401 };
6402 llvm::Function *Fn;
6403 llvm::Constant *Addr;
6404 // Emit target region as a standalone region.
6405 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6406 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6407 assert(Fn && Addr && "Target device function emission failed.");
6408 }
6409
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)6410 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
6411 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
6412 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6413 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
6414 };
6415 emitCommonOMPTargetDirective(*this, S, CodeGen);
6416 }
6417
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)6418 void CodeGenFunction::EmitOMPCancellationPointDirective(
6419 const OMPCancellationPointDirective &S) {
6420 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
6421 S.getCancelRegion());
6422 }
6423
EmitOMPCancelDirective(const OMPCancelDirective & S)6424 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
6425 const Expr *IfCond = nullptr;
6426 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6427 if (C->getNameModifier() == OMPD_unknown ||
6428 C->getNameModifier() == OMPD_cancel) {
6429 IfCond = C->getCondition();
6430 break;
6431 }
6432 }
6433 if (CGM.getLangOpts().OpenMPIRBuilder) {
6434 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6435 // TODO: This check is necessary as we only generate `omp parallel` through
6436 // the OpenMPIRBuilder for now.
6437 if (S.getCancelRegion() == OMPD_parallel ||
6438 S.getCancelRegion() == OMPD_sections ||
6439 S.getCancelRegion() == OMPD_section) {
6440 llvm::Value *IfCondition = nullptr;
6441 if (IfCond)
6442 IfCondition = EmitScalarExpr(IfCond,
6443 /*IgnoreResultAssign=*/true);
6444 return Builder.restoreIP(
6445 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
6446 }
6447 }
6448
6449 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
6450 S.getCancelRegion());
6451 }
6452
6453 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)6454 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
6455 if (Kind == OMPD_parallel || Kind == OMPD_task ||
6456 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
6457 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
6458 return ReturnBlock;
6459 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
6460 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
6461 Kind == OMPD_distribute_parallel_for ||
6462 Kind == OMPD_target_parallel_for ||
6463 Kind == OMPD_teams_distribute_parallel_for ||
6464 Kind == OMPD_target_teams_distribute_parallel_for);
6465 return OMPCancelStack.getExitBlock();
6466 }
6467
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)6468 void CodeGenFunction::EmitOMPUseDevicePtrClause(
6469 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
6470 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6471 auto OrigVarIt = C.varlist_begin();
6472 auto InitIt = C.inits().begin();
6473 for (const Expr *PvtVarIt : C.private_copies()) {
6474 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
6475 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
6476 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
6477
6478 // In order to identify the right initializer we need to match the
6479 // declaration used by the mapping logic. In some cases we may get
6480 // OMPCapturedExprDecl that refers to the original declaration.
6481 const ValueDecl *MatchingVD = OrigVD;
6482 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6483 // OMPCapturedExprDecl are used to privative fields of the current
6484 // structure.
6485 const auto *ME = cast<MemberExpr>(OED->getInit());
6486 assert(isa<CXXThisExpr>(ME->getBase()) &&
6487 "Base should be the current struct!");
6488 MatchingVD = ME->getMemberDecl();
6489 }
6490
6491 // If we don't have information about the current list item, move on to
6492 // the next one.
6493 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6494 if (InitAddrIt == CaptureDeviceAddrMap.end())
6495 continue;
6496
6497 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
6498 InitAddrIt, InitVD,
6499 PvtVD]() {
6500 // Initialize the temporary initialization variable with the address we
6501 // get from the runtime library. We have to cast the source address
6502 // because it is always a void *. References are materialized in the
6503 // privatization scope, so the initialization here disregards the fact
6504 // the original variable is a reference.
6505 QualType AddrQTy =
6506 getContext().getPointerType(OrigVD->getType().getNonReferenceType());
6507 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
6508 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
6509 setAddrOfLocalVar(InitVD, InitAddr);
6510
6511 // Emit private declaration, it will be initialized by the value we
6512 // declaration we just added to the local declarations map.
6513 EmitDecl(*PvtVD);
6514
6515 // The initialization variables reached its purpose in the emission
6516 // of the previous declaration, so we don't need it anymore.
6517 LocalDeclMap.erase(InitVD);
6518
6519 // Return the address of the private variable.
6520 return GetAddrOfLocalVar(PvtVD);
6521 });
6522 assert(IsRegistered && "firstprivate var already registered as private");
6523 // Silence the warning about unused variable.
6524 (void)IsRegistered;
6525
6526 ++OrigVarIt;
6527 ++InitIt;
6528 }
6529 }
6530
getBaseDecl(const Expr * Ref)6531 static const VarDecl *getBaseDecl(const Expr *Ref) {
6532 const Expr *Base = Ref->IgnoreParenImpCasts();
6533 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6534 Base = OASE->getBase()->IgnoreParenImpCasts();
6535 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6536 Base = ASE->getBase()->IgnoreParenImpCasts();
6537 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6538 }
6539
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)6540 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6541 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6542 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6543 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6544 for (const Expr *Ref : C.varlists()) {
6545 const VarDecl *OrigVD = getBaseDecl(Ref);
6546 if (!Processed.insert(OrigVD).second)
6547 continue;
6548 // In order to identify the right initializer we need to match the
6549 // declaration used by the mapping logic. In some cases we may get
6550 // OMPCapturedExprDecl that refers to the original declaration.
6551 const ValueDecl *MatchingVD = OrigVD;
6552 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6553 // OMPCapturedExprDecl are used to privative fields of the current
6554 // structure.
6555 const auto *ME = cast<MemberExpr>(OED->getInit());
6556 assert(isa<CXXThisExpr>(ME->getBase()) &&
6557 "Base should be the current struct!");
6558 MatchingVD = ME->getMemberDecl();
6559 }
6560
6561 // If we don't have information about the current list item, move on to
6562 // the next one.
6563 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6564 if (InitAddrIt == CaptureDeviceAddrMap.end())
6565 continue;
6566
6567 Address PrivAddr = InitAddrIt->getSecond();
6568 // For declrefs and variable length array need to load the pointer for
6569 // correct mapping, since the pointer to the data was passed to the runtime.
6570 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6571 MatchingVD->getType()->isArrayType())
6572 PrivAddr =
6573 EmitLoadOfPointer(PrivAddr, getContext()
6574 .getPointerType(OrigVD->getType())
6575 ->castAs<PointerType>());
6576 llvm::Type *RealTy =
6577 ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6578 ->getPointerTo();
6579 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6580
6581 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6582 }
6583 }
6584
6585 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)6586 void CodeGenFunction::EmitOMPTargetDataDirective(
6587 const OMPTargetDataDirective &S) {
6588 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
6589 /*SeparateBeginEndCalls=*/true);
6590
6591 // Create a pre/post action to signal the privatization of the device pointer.
6592 // This action can be replaced by the OpenMP runtime code generation to
6593 // deactivate privatization.
6594 bool PrivatizeDevicePointers = false;
6595 class DevicePointerPrivActionTy : public PrePostActionTy {
6596 bool &PrivatizeDevicePointers;
6597
6598 public:
6599 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
6600 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
6601 void Enter(CodeGenFunction &CGF) override {
6602 PrivatizeDevicePointers = true;
6603 }
6604 };
6605 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
6606
6607 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
6608 CodeGenFunction &CGF, PrePostActionTy &Action) {
6609 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6610 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6611 };
6612
6613 // Codegen that selects whether to generate the privatization code or not.
6614 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
6615 &InnermostCodeGen](CodeGenFunction &CGF,
6616 PrePostActionTy &Action) {
6617 RegionCodeGenTy RCG(InnermostCodeGen);
6618 PrivatizeDevicePointers = false;
6619
6620 // Call the pre-action to change the status of PrivatizeDevicePointers if
6621 // needed.
6622 Action.Enter(CGF);
6623
6624 if (PrivatizeDevicePointers) {
6625 OMPPrivateScope PrivateScope(CGF);
6626 // Emit all instances of the use_device_ptr clause.
6627 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
6628 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
6629 Info.CaptureDeviceAddrMap);
6630 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6631 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6632 Info.CaptureDeviceAddrMap);
6633 (void)PrivateScope.Privatize();
6634 RCG(CGF);
6635 } else {
6636 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6637 RCG(CGF);
6638 }
6639 };
6640
6641 // Forward the provided action to the privatization codegen.
6642 RegionCodeGenTy PrivRCG(PrivCodeGen);
6643 PrivRCG.setAction(Action);
6644
6645 // Notwithstanding the body of the region is emitted as inlined directive,
6646 // we don't use an inline scope as changes in the references inside the
6647 // region are expected to be visible outside, so we do not privative them.
6648 OMPLexicalScope Scope(CGF, S);
6649 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
6650 PrivRCG);
6651 };
6652
6653 RegionCodeGenTy RCG(CodeGen);
6654
6655 // If we don't have target devices, don't bother emitting the data mapping
6656 // code.
6657 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
6658 RCG(*this);
6659 return;
6660 }
6661
6662 // Check if we have any if clause associated with the directive.
6663 const Expr *IfCond = nullptr;
6664 if (const auto *C = S.getSingleClause<OMPIfClause>())
6665 IfCond = C->getCondition();
6666
6667 // Check if we have any device clause associated with the directive.
6668 const Expr *Device = nullptr;
6669 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6670 Device = C->getDevice();
6671
6672 // Set the action to signal privatization of device pointers.
6673 RCG.setAction(PrivAction);
6674
6675 // Emit region code.
6676 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
6677 Info);
6678 }
6679
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)6680 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
6681 const OMPTargetEnterDataDirective &S) {
6682 // If we don't have target devices, don't bother emitting the data mapping
6683 // code.
6684 if (CGM.getLangOpts().OMPTargetTriples.empty())
6685 return;
6686
6687 // Check if we have any if clause associated with the directive.
6688 const Expr *IfCond = nullptr;
6689 if (const auto *C = S.getSingleClause<OMPIfClause>())
6690 IfCond = C->getCondition();
6691
6692 // Check if we have any device clause associated with the directive.
6693 const Expr *Device = nullptr;
6694 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6695 Device = C->getDevice();
6696
6697 OMPLexicalScope Scope(*this, S, OMPD_task);
6698 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6699 }
6700
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)6701 void CodeGenFunction::EmitOMPTargetExitDataDirective(
6702 const OMPTargetExitDataDirective &S) {
6703 // If we don't have target devices, don't bother emitting the data mapping
6704 // code.
6705 if (CGM.getLangOpts().OMPTargetTriples.empty())
6706 return;
6707
6708 // Check if we have any if clause associated with the directive.
6709 const Expr *IfCond = nullptr;
6710 if (const auto *C = S.getSingleClause<OMPIfClause>())
6711 IfCond = C->getCondition();
6712
6713 // Check if we have any device clause associated with the directive.
6714 const Expr *Device = nullptr;
6715 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6716 Device = C->getDevice();
6717
6718 OMPLexicalScope Scope(*this, S, OMPD_task);
6719 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6720 }
6721
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)6722 static void emitTargetParallelRegion(CodeGenFunction &CGF,
6723 const OMPTargetParallelDirective &S,
6724 PrePostActionTy &Action) {
6725 // Get the captured statement associated with the 'parallel' region.
6726 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
6727 Action.Enter(CGF);
6728 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6729 Action.Enter(CGF);
6730 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6731 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6732 CGF.EmitOMPPrivateClause(S, PrivateScope);
6733 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6734 (void)PrivateScope.Privatize();
6735 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6736 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6737 // TODO: Add support for clauses.
6738 CGF.EmitStmt(CS->getCapturedStmt());
6739 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
6740 };
6741 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
6742 emitEmptyBoundParameters);
6743 emitPostUpdateForReductionClause(CGF, S,
6744 [](CodeGenFunction &) { return nullptr; });
6745 }
6746
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)6747 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
6748 CodeGenModule &CGM, StringRef ParentName,
6749 const OMPTargetParallelDirective &S) {
6750 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6751 emitTargetParallelRegion(CGF, S, Action);
6752 };
6753 llvm::Function *Fn;
6754 llvm::Constant *Addr;
6755 // Emit target region as a standalone region.
6756 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6757 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6758 assert(Fn && Addr && "Target device function emission failed.");
6759 }
6760
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)6761 void CodeGenFunction::EmitOMPTargetParallelDirective(
6762 const OMPTargetParallelDirective &S) {
6763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6764 emitTargetParallelRegion(CGF, S, Action);
6765 };
6766 emitCommonOMPTargetDirective(*this, S, CodeGen);
6767 }
6768
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)6769 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
6770 const OMPTargetParallelForDirective &S,
6771 PrePostActionTy &Action) {
6772 Action.Enter(CGF);
6773 // Emit directive as a combined directive that consists of two implicit
6774 // directives: 'parallel' with 'for' directive.
6775 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6776 Action.Enter(CGF);
6777 CodeGenFunction::OMPCancelStackRAII CancelRegion(
6778 CGF, OMPD_target_parallel_for, S.hasCancel());
6779 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6780 emitDispatchForLoopBounds);
6781 };
6782 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
6783 emitEmptyBoundParameters);
6784 }
6785
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)6786 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
6787 CodeGenModule &CGM, StringRef ParentName,
6788 const OMPTargetParallelForDirective &S) {
6789 // Emit SPMD target parallel for region as a standalone region.
6790 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6791 emitTargetParallelForRegion(CGF, S, Action);
6792 };
6793 llvm::Function *Fn;
6794 llvm::Constant *Addr;
6795 // Emit target region as a standalone region.
6796 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6797 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6798 assert(Fn && Addr && "Target device function emission failed.");
6799 }
6800
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)6801 void CodeGenFunction::EmitOMPTargetParallelForDirective(
6802 const OMPTargetParallelForDirective &S) {
6803 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6804 emitTargetParallelForRegion(CGF, S, Action);
6805 };
6806 emitCommonOMPTargetDirective(*this, S, CodeGen);
6807 }
6808
6809 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)6810 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
6811 const OMPTargetParallelForSimdDirective &S,
6812 PrePostActionTy &Action) {
6813 Action.Enter(CGF);
6814 // Emit directive as a combined directive that consists of two implicit
6815 // directives: 'parallel' with 'for' directive.
6816 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6817 Action.Enter(CGF);
6818 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6819 emitDispatchForLoopBounds);
6820 };
6821 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
6822 emitEmptyBoundParameters);
6823 }
6824
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)6825 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
6826 CodeGenModule &CGM, StringRef ParentName,
6827 const OMPTargetParallelForSimdDirective &S) {
6828 // Emit SPMD target parallel for region as a standalone region.
6829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6830 emitTargetParallelForSimdRegion(CGF, S, Action);
6831 };
6832 llvm::Function *Fn;
6833 llvm::Constant *Addr;
6834 // Emit target region as a standalone region.
6835 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6836 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6837 assert(Fn && Addr && "Target device function emission failed.");
6838 }
6839
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)6840 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
6841 const OMPTargetParallelForSimdDirective &S) {
6842 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6843 emitTargetParallelForSimdRegion(CGF, S, Action);
6844 };
6845 emitCommonOMPTargetDirective(*this, S, CodeGen);
6846 }
6847
6848 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)6849 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
6850 const ImplicitParamDecl *PVD,
6851 CodeGenFunction::OMPPrivateScope &Privates) {
6852 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
6853 Privates.addPrivate(VDecl,
6854 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
6855 }
6856
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)6857 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
6858 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
6859 // Emit outlined function for task construct.
6860 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
6861 Address CapturedStruct = Address::invalid();
6862 {
6863 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6864 CapturedStruct = GenerateCapturedStmtArgument(*CS);
6865 }
6866 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
6867 const Expr *IfCond = nullptr;
6868 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6869 if (C->getNameModifier() == OMPD_unknown ||
6870 C->getNameModifier() == OMPD_taskloop) {
6871 IfCond = C->getCondition();
6872 break;
6873 }
6874 }
6875
6876 OMPTaskDataTy Data;
6877 // Check if taskloop must be emitted without taskgroup.
6878 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
6879 // TODO: Check if we should emit tied or untied task.
6880 Data.Tied = true;
6881 // Set scheduling for taskloop
6882 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
6883 // grainsize clause
6884 Data.Schedule.setInt(/*IntVal=*/false);
6885 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
6886 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
6887 // num_tasks clause
6888 Data.Schedule.setInt(/*IntVal=*/true);
6889 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
6890 }
6891
6892 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
6893 // if (PreCond) {
6894 // for (IV in 0..LastIteration) BODY;
6895 // <Final counter/linear vars updates>;
6896 // }
6897 //
6898
6899 // Emit: if (PreCond) - begin.
6900 // If the condition constant folds and can be elided, avoid emitting the
6901 // whole loop.
6902 bool CondConstant;
6903 llvm::BasicBlock *ContBlock = nullptr;
6904 OMPLoopScope PreInitScope(CGF, S);
6905 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6906 if (!CondConstant)
6907 return;
6908 } else {
6909 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
6910 ContBlock = CGF.createBasicBlock("taskloop.if.end");
6911 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
6912 CGF.getProfileCount(&S));
6913 CGF.EmitBlock(ThenBlock);
6914 CGF.incrementProfileCounter(&S);
6915 }
6916
6917 (void)CGF.EmitOMPLinearClauseInit(S);
6918
6919 OMPPrivateScope LoopScope(CGF);
6920 // Emit helper vars inits.
6921 enum { LowerBound = 5, UpperBound, Stride, LastIter };
6922 auto *I = CS->getCapturedDecl()->param_begin();
6923 auto *LBP = std::next(I, LowerBound);
6924 auto *UBP = std::next(I, UpperBound);
6925 auto *STP = std::next(I, Stride);
6926 auto *LIP = std::next(I, LastIter);
6927 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
6928 LoopScope);
6929 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
6930 LoopScope);
6931 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
6932 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
6933 LoopScope);
6934 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
6935 CGF.EmitOMPLinearClause(S, LoopScope);
6936 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
6937 (void)LoopScope.Privatize();
6938 // Emit the loop iteration variable.
6939 const Expr *IVExpr = S.getIterationVariable();
6940 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
6941 CGF.EmitVarDecl(*IVDecl);
6942 CGF.EmitIgnoredExpr(S.getInit());
6943
6944 // Emit the iterations count variable.
6945 // If it is not a variable, Sema decided to calculate iterations count on
6946 // each iteration (e.g., it is foldable into a constant).
6947 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6948 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6949 // Emit calculation of the iterations count.
6950 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
6951 }
6952
6953 {
6954 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6955 emitCommonSimdLoop(
6956 CGF, S,
6957 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6958 if (isOpenMPSimdDirective(S.getDirectiveKind()))
6959 CGF.EmitOMPSimdInit(S);
6960 },
6961 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
6962 CGF.EmitOMPInnerLoop(
6963 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
6964 [&S](CodeGenFunction &CGF) {
6965 emitOMPLoopBodyWithStopPoint(CGF, S,
6966 CodeGenFunction::JumpDest());
6967 },
6968 [](CodeGenFunction &) {});
6969 });
6970 }
6971 // Emit: if (PreCond) - end.
6972 if (ContBlock) {
6973 CGF.EmitBranch(ContBlock);
6974 CGF.EmitBlock(ContBlock, true);
6975 }
6976 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6977 if (HasLastprivateClause) {
6978 CGF.EmitOMPLastprivateClauseFinal(
6979 S, isOpenMPSimdDirective(S.getDirectiveKind()),
6980 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
6981 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6982 (*LIP)->getType(), S.getBeginLoc())));
6983 }
6984 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
6985 return CGF.Builder.CreateIsNotNull(
6986 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6987 (*LIP)->getType(), S.getBeginLoc()));
6988 });
6989 };
6990 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
6991 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
6992 const OMPTaskDataTy &Data) {
6993 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
6994 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
6995 OMPLoopScope PreInitScope(CGF, S);
6996 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
6997 OutlinedFn, SharedsTy,
6998 CapturedStruct, IfCond, Data);
6999 };
7000 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7001 CodeGen);
7002 };
7003 if (Data.Nogroup) {
7004 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7005 } else {
7006 CGM.getOpenMPRuntime().emitTaskgroupRegion(
7007 *this,
7008 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7009 PrePostActionTy &Action) {
7010 Action.Enter(CGF);
7011 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7012 Data);
7013 },
7014 S.getBeginLoc());
7015 }
7016 }
7017
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)7018 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7019 auto LPCRegion =
7020 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7021 EmitOMPTaskLoopBasedDirective(S);
7022 }
7023
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)7024 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7025 const OMPTaskLoopSimdDirective &S) {
7026 auto LPCRegion =
7027 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7028 OMPLexicalScope Scope(*this, S);
7029 EmitOMPTaskLoopBasedDirective(S);
7030 }
7031
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)7032 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7033 const OMPMasterTaskLoopDirective &S) {
7034 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7035 Action.Enter(CGF);
7036 EmitOMPTaskLoopBasedDirective(S);
7037 };
7038 auto LPCRegion =
7039 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7040 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
7041 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7042 }
7043
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)7044 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7045 const OMPMasterTaskLoopSimdDirective &S) {
7046 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7047 Action.Enter(CGF);
7048 EmitOMPTaskLoopBasedDirective(S);
7049 };
7050 auto LPCRegion =
7051 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7052 OMPLexicalScope Scope(*this, S);
7053 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7054 }
7055
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)7056 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7057 const OMPParallelMasterTaskLoopDirective &S) {
7058 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7059 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7060 PrePostActionTy &Action) {
7061 Action.Enter(CGF);
7062 CGF.EmitOMPTaskLoopBasedDirective(S);
7063 };
7064 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7065 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7066 S.getBeginLoc());
7067 };
7068 auto LPCRegion =
7069 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7070 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7071 emitEmptyBoundParameters);
7072 }
7073
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)7074 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7075 const OMPParallelMasterTaskLoopSimdDirective &S) {
7076 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7077 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7078 PrePostActionTy &Action) {
7079 Action.Enter(CGF);
7080 CGF.EmitOMPTaskLoopBasedDirective(S);
7081 };
7082 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7083 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7084 S.getBeginLoc());
7085 };
7086 auto LPCRegion =
7087 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7088 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7089 emitEmptyBoundParameters);
7090 }
7091
7092 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)7093 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7094 const OMPTargetUpdateDirective &S) {
7095 // If we don't have target devices, don't bother emitting the data mapping
7096 // code.
7097 if (CGM.getLangOpts().OMPTargetTriples.empty())
7098 return;
7099
7100 // Check if we have any if clause associated with the directive.
7101 const Expr *IfCond = nullptr;
7102 if (const auto *C = S.getSingleClause<OMPIfClause>())
7103 IfCond = C->getCondition();
7104
7105 // Check if we have any device clause associated with the directive.
7106 const Expr *Device = nullptr;
7107 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7108 Device = C->getDevice();
7109
7110 OMPLexicalScope Scope(*this, S, OMPD_task);
7111 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7112 }
7113
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)7114 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7115 const OMPExecutableDirective &D) {
7116 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7117 EmitOMPScanDirective(*SD);
7118 return;
7119 }
7120 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
7121 return;
7122 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
7123 OMPPrivateScope GlobalsScope(CGF);
7124 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
7125 // Capture global firstprivates to avoid crash.
7126 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
7127 for (const Expr *Ref : C->varlists()) {
7128 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
7129 if (!DRE)
7130 continue;
7131 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
7132 if (!VD || VD->hasLocalStorage())
7133 continue;
7134 if (!CGF.LocalDeclMap.count(VD)) {
7135 LValue GlobLVal = CGF.EmitLValue(Ref);
7136 GlobalsScope.addPrivate(
7137 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
7138 }
7139 }
7140 }
7141 }
7142 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
7143 (void)GlobalsScope.Privatize();
7144 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
7145 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
7146 } else {
7147 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
7148 for (const Expr *E : LD->counters()) {
7149 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
7150 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
7151 LValue GlobLVal = CGF.EmitLValue(E);
7152 GlobalsScope.addPrivate(
7153 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
7154 }
7155 if (isa<OMPCapturedExprDecl>(VD)) {
7156 // Emit only those that were not explicitly referenced in clauses.
7157 if (!CGF.LocalDeclMap.count(VD))
7158 CGF.EmitVarDecl(*VD);
7159 }
7160 }
7161 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
7162 if (!C->getNumForLoops())
7163 continue;
7164 for (unsigned I = LD->getLoopsNumber(),
7165 E = C->getLoopNumIterations().size();
7166 I < E; ++I) {
7167 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
7168 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
7169 // Emit only those that were not explicitly referenced in clauses.
7170 if (!CGF.LocalDeclMap.count(VD))
7171 CGF.EmitVarDecl(*VD);
7172 }
7173 }
7174 }
7175 }
7176 (void)GlobalsScope.Privatize();
7177 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
7178 }
7179 };
7180 if (D.getDirectiveKind() == OMPD_atomic ||
7181 D.getDirectiveKind() == OMPD_critical ||
7182 D.getDirectiveKind() == OMPD_section ||
7183 D.getDirectiveKind() == OMPD_master ||
7184 D.getDirectiveKind() == OMPD_masked) {
7185 EmitStmt(D.getAssociatedStmt());
7186 } else {
7187 auto LPCRegion =
7188 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
7189 OMPSimdLexicalScope Scope(*this, D);
7190 CGM.getOpenMPRuntime().emitInlinedDirective(
7191 *this,
7192 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
7193 : D.getDirectiveKind(),
7194 CodeGen);
7195 }
7196 // Check for outer lastprivate conditional update.
7197 checkForLastprivateConditionalUpdate(*this, D);
7198 }
7199