1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/Basic/OpenMPKinds.h"
25 #include "clang/Basic/PrettyStackTrace.h"
26 #include "llvm/Frontend/OpenMP/OMPConstants.h"
27 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/Support/AtomicOrdering.h"
31 using namespace clang;
32 using namespace CodeGen;
33 using namespace llvm::omp;
34
35 static const VarDecl *getBaseDecl(const Expr *Ref);
36
37 namespace {
38 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
39 /// for captured expressions.
40 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)41 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
42 for (const auto *C : S.clauses()) {
43 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
44 if (const auto *PreInit =
45 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
46 for (const auto *I : PreInit->decls()) {
47 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
48 CGF.EmitVarDecl(cast<VarDecl>(*I));
49 } else {
50 CodeGenFunction::AutoVarEmission Emission =
51 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
52 CGF.EmitAutoVarCleanups(Emission);
53 }
54 }
55 }
56 }
57 }
58 }
59 CodeGenFunction::OMPPrivateScope InlinedShareds;
60
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)61 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
62 return CGF.LambdaCaptureFields.lookup(VD) ||
63 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
64 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
65 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
66 }
67
68 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const llvm::Optional<OpenMPDirectiveKind> CapturedRegion=llvm::None,const bool EmitPreInitStmt=true)69 OMPLexicalScope(
70 CodeGenFunction &CGF, const OMPExecutableDirective &S,
71 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
72 const bool EmitPreInitStmt = true)
73 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
74 InlinedShareds(CGF) {
75 if (EmitPreInitStmt)
76 emitPreInitStmt(CGF, S);
77 if (!CapturedRegion.hasValue())
78 return;
79 assert(S.hasAssociatedStmt() &&
80 "Expected associated statement for inlined directive.");
81 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
82 for (const auto &C : CS->captures()) {
83 if (C.capturesVariable() || C.capturesVariableByCopy()) {
84 auto *VD = C.getCapturedVar();
85 assert(VD == VD->getCanonicalDecl() &&
86 "Canonical decl must be captured.");
87 DeclRefExpr DRE(
88 CGF.getContext(), const_cast<VarDecl *>(VD),
89 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
90 InlinedShareds.isGlobalVarCaptured(VD)),
91 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
92 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
93 return CGF.EmitLValue(&DRE).getAddress(CGF);
94 });
95 }
96 }
97 (void)InlinedShareds.Privatize();
98 }
99 };
100
101 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
102 /// for captured expressions.
103 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)104 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
105 OpenMPDirectiveKind Kind = S.getDirectiveKind();
106 return !(isOpenMPTargetExecutionDirective(Kind) ||
107 isOpenMPLoopBoundSharingDirective(Kind)) &&
108 isOpenMPParallelDirective(Kind);
109 }
110
111 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)112 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
113 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
114 EmitPreInitStmt(S)) {}
115 };
116
117 /// Lexical scope for OpenMP teams construct, that handles correct codegen
118 /// for captured expressions.
119 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)120 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
121 OpenMPDirectiveKind Kind = S.getDirectiveKind();
122 return !isOpenMPTargetExecutionDirective(Kind) &&
123 isOpenMPTeamsDirective(Kind);
124 }
125
126 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)127 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
128 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
129 EmitPreInitStmt(S)) {}
130 };
131
132 /// Private scope for OpenMP loop-based directives, that supports capturing
133 /// of used expression from loop statement.
134 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopDirective & S)135 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
136 CodeGenFunction::OMPMapVars PreCondVars;
137 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
138 for (const auto *E : S.counters()) {
139 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
140 EmittedAsPrivate.insert(VD->getCanonicalDecl());
141 (void)PreCondVars.setVarAddr(
142 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
143 }
144 // Mark private vars as undefs.
145 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
146 for (const Expr *IRef : C->varlists()) {
147 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
148 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
149 (void)PreCondVars.setVarAddr(
150 CGF, OrigVD,
151 Address(llvm::UndefValue::get(
152 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
153 OrigVD->getType().getNonReferenceType()))),
154 CGF.getContext().getDeclAlign(OrigVD)));
155 }
156 }
157 }
158 (void)PreCondVars.apply(CGF);
159 // Emit init, __range and __end variables for C++ range loops.
160 const Stmt *Body =
161 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
162 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
163 Body = OMPLoopDirective::tryToFindNextInnerLoop(
164 Body, /*TryImperfectlyNestedLoops=*/true);
165 if (auto *For = dyn_cast<ForStmt>(Body)) {
166 Body = For->getBody();
167 } else {
168 assert(isa<CXXForRangeStmt>(Body) &&
169 "Expected canonical for loop or range-based for loop.");
170 auto *CXXFor = cast<CXXForRangeStmt>(Body);
171 if (const Stmt *Init = CXXFor->getInit())
172 CGF.EmitStmt(Init);
173 CGF.EmitStmt(CXXFor->getRangeStmt());
174 CGF.EmitStmt(CXXFor->getEndStmt());
175 Body = CXXFor->getBody();
176 }
177 }
178 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
179 for (const auto *I : PreInits->decls())
180 CGF.EmitVarDecl(cast<VarDecl>(*I));
181 }
182 PreCondVars.restore(CGF);
183 }
184
185 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopDirective & S)186 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
187 : CodeGenFunction::RunCleanupsScope(CGF) {
188 emitPreInitStmt(CGF, S);
189 }
190 };
191
192 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
193 CodeGenFunction::OMPPrivateScope InlinedShareds;
194
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)195 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
196 return CGF.LambdaCaptureFields.lookup(VD) ||
197 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
198 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
199 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
200 }
201
202 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)203 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
204 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
205 InlinedShareds(CGF) {
206 for (const auto *C : S.clauses()) {
207 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
208 if (const auto *PreInit =
209 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
210 for (const auto *I : PreInit->decls()) {
211 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
212 CGF.EmitVarDecl(cast<VarDecl>(*I));
213 } else {
214 CodeGenFunction::AutoVarEmission Emission =
215 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
216 CGF.EmitAutoVarCleanups(Emission);
217 }
218 }
219 }
220 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
221 for (const Expr *E : UDP->varlists()) {
222 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
223 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
224 CGF.EmitVarDecl(*OED);
225 }
226 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
227 for (const Expr *E : UDP->varlists()) {
228 const Decl *D = getBaseDecl(E);
229 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
230 CGF.EmitVarDecl(*OED);
231 }
232 }
233 }
234 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
235 CGF.EmitOMPPrivateClause(S, InlinedShareds);
236 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
237 if (const Expr *E = TG->getReductionRef())
238 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
239 }
240 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
241 while (CS) {
242 for (auto &C : CS->captures()) {
243 if (C.capturesVariable() || C.capturesVariableByCopy()) {
244 auto *VD = C.getCapturedVar();
245 assert(VD == VD->getCanonicalDecl() &&
246 "Canonical decl must be captured.");
247 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
248 isCapturedVar(CGF, VD) ||
249 (CGF.CapturedStmtInfo &&
250 InlinedShareds.isGlobalVarCaptured(VD)),
251 VD->getType().getNonReferenceType(), VK_LValue,
252 C.getLocation());
253 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
254 return CGF.EmitLValue(&DRE).getAddress(CGF);
255 });
256 }
257 }
258 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
259 }
260 (void)InlinedShareds.Privatize();
261 }
262 };
263
264 } // namespace
265
266 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
267 const OMPExecutableDirective &S,
268 const RegionCodeGenTy &CodeGen);
269
EmitOMPSharedLValue(const Expr * E)270 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
271 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
272 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
273 OrigVD = OrigVD->getCanonicalDecl();
274 bool IsCaptured =
275 LambdaCaptureFields.lookup(OrigVD) ||
276 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
277 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
278 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
279 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
280 return EmitLValue(&DRE);
281 }
282 }
283 return EmitLValue(E);
284 }
285
getTypeSize(QualType Ty)286 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
287 ASTContext &C = getContext();
288 llvm::Value *Size = nullptr;
289 auto SizeInChars = C.getTypeSizeInChars(Ty);
290 if (SizeInChars.isZero()) {
291 // getTypeSizeInChars() returns 0 for a VLA.
292 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
293 VlaSizePair VlaSize = getVLASize(VAT);
294 Ty = VlaSize.Type;
295 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
296 : VlaSize.NumElts;
297 }
298 SizeInChars = C.getTypeSizeInChars(Ty);
299 if (SizeInChars.isZero())
300 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
301 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
302 }
303 return CGM.getSize(SizeInChars);
304 }
305
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)306 void CodeGenFunction::GenerateOpenMPCapturedVars(
307 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
308 const RecordDecl *RD = S.getCapturedRecordDecl();
309 auto CurField = RD->field_begin();
310 auto CurCap = S.captures().begin();
311 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
312 E = S.capture_init_end();
313 I != E; ++I, ++CurField, ++CurCap) {
314 if (CurField->hasCapturedVLAType()) {
315 const VariableArrayType *VAT = CurField->getCapturedVLAType();
316 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
317 CapturedVars.push_back(Val);
318 } else if (CurCap->capturesThis()) {
319 CapturedVars.push_back(CXXThisValue);
320 } else if (CurCap->capturesVariableByCopy()) {
321 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
322
323 // If the field is not a pointer, we need to save the actual value
324 // and load it as a void pointer.
325 if (!CurField->getType()->isAnyPointerType()) {
326 ASTContext &Ctx = getContext();
327 Address DstAddr = CreateMemTemp(
328 Ctx.getUIntPtrType(),
329 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
330 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
331
332 llvm::Value *SrcAddrVal = EmitScalarConversion(
333 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
334 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
335 LValue SrcLV =
336 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
337
338 // Store the value using the source type pointer.
339 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
340
341 // Load the value using the destination type pointer.
342 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
343 }
344 CapturedVars.push_back(CV);
345 } else {
346 assert(CurCap->capturesVariable() && "Expected capture by reference.");
347 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
348 }
349 }
350 }
351
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)352 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
353 QualType DstType, StringRef Name,
354 LValue AddrLV) {
355 ASTContext &Ctx = CGF.getContext();
356
357 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
358 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
359 Ctx.getPointerType(DstType), Loc);
360 Address TmpAddr =
361 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
362 .getAddress(CGF);
363 return TmpAddr;
364 }
365
getCanonicalParamType(ASTContext & C,QualType T)366 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
367 if (T->isLValueReferenceType())
368 return C.getLValueReferenceType(
369 getCanonicalParamType(C, T.getNonReferenceType()),
370 /*SpelledAsLValue=*/false);
371 if (T->isPointerType())
372 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
373 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
374 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
375 return getCanonicalParamType(C, VLA->getElementType());
376 if (!A->isVariablyModifiedType())
377 return C.getCanonicalType(T);
378 }
379 return C.getCanonicalParamType(T);
380 }
381
382 namespace {
383 /// Contains required data for proper outlined function codegen.
384 struct FunctionOptions {
385 /// Captured statement for which the function is generated.
386 const CapturedStmt *S = nullptr;
387 /// true if cast to/from UIntPtr is required for variables captured by
388 /// value.
389 const bool UIntPtrCastRequired = true;
390 /// true if only casted arguments must be registered as local args or VLA
391 /// sizes.
392 const bool RegisterCastedArgsOnly = false;
393 /// Name of the generated function.
394 const StringRef FunctionName;
395 /// Location of the non-debug version of the outlined function.
396 SourceLocation Loc;
FunctionOptions__anoncd1c19710411::FunctionOptions397 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
398 bool RegisterCastedArgsOnly, StringRef FunctionName,
399 SourceLocation Loc)
400 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
401 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
402 FunctionName(FunctionName), Loc(Loc) {}
403 };
404 } // namespace
405
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)406 static llvm::Function *emitOutlinedFunctionPrologue(
407 CodeGenFunction &CGF, FunctionArgList &Args,
408 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
409 &LocalAddrs,
410 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
411 &VLASizes,
412 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
413 const CapturedDecl *CD = FO.S->getCapturedDecl();
414 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
415 assert(CD->hasBody() && "missing CapturedDecl body");
416
417 CXXThisValue = nullptr;
418 // Build the argument list.
419 CodeGenModule &CGM = CGF.CGM;
420 ASTContext &Ctx = CGM.getContext();
421 FunctionArgList TargetArgs;
422 Args.append(CD->param_begin(),
423 std::next(CD->param_begin(), CD->getContextParamPosition()));
424 TargetArgs.append(
425 CD->param_begin(),
426 std::next(CD->param_begin(), CD->getContextParamPosition()));
427 auto I = FO.S->captures().begin();
428 FunctionDecl *DebugFunctionDecl = nullptr;
429 if (!FO.UIntPtrCastRequired) {
430 FunctionProtoType::ExtProtoInfo EPI;
431 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
432 DebugFunctionDecl = FunctionDecl::Create(
433 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
434 SourceLocation(), DeclarationName(), FunctionTy,
435 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
436 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
437 }
438 for (const FieldDecl *FD : RD->fields()) {
439 QualType ArgType = FD->getType();
440 IdentifierInfo *II = nullptr;
441 VarDecl *CapVar = nullptr;
442
443 // If this is a capture by copy and the type is not a pointer, the outlined
444 // function argument type should be uintptr and the value properly casted to
445 // uintptr. This is necessary given that the runtime library is only able to
446 // deal with pointers. We can pass in the same way the VLA type sizes to the
447 // outlined function.
448 if (FO.UIntPtrCastRequired &&
449 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
450 I->capturesVariableArrayType()))
451 ArgType = Ctx.getUIntPtrType();
452
453 if (I->capturesVariable() || I->capturesVariableByCopy()) {
454 CapVar = I->getCapturedVar();
455 II = CapVar->getIdentifier();
456 } else if (I->capturesThis()) {
457 II = &Ctx.Idents.get("this");
458 } else {
459 assert(I->capturesVariableArrayType());
460 II = &Ctx.Idents.get("vla");
461 }
462 if (ArgType->isVariablyModifiedType())
463 ArgType = getCanonicalParamType(Ctx, ArgType);
464 VarDecl *Arg;
465 if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
466 Arg = ParmVarDecl::Create(
467 Ctx, DebugFunctionDecl,
468 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
469 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
470 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
471 } else {
472 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
473 II, ArgType, ImplicitParamDecl::Other);
474 }
475 Args.emplace_back(Arg);
476 // Do not cast arguments if we emit function with non-original types.
477 TargetArgs.emplace_back(
478 FO.UIntPtrCastRequired
479 ? Arg
480 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
481 ++I;
482 }
483 Args.append(
484 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
485 CD->param_end());
486 TargetArgs.append(
487 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
488 CD->param_end());
489
490 // Create the function declaration.
491 const CGFunctionInfo &FuncInfo =
492 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
493 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
494
495 auto *F =
496 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
497 FO.FunctionName, &CGM.getModule());
498 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
499 if (CD->isNothrow())
500 F->setDoesNotThrow();
501 F->setDoesNotRecurse();
502
503 // Generate the function.
504 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
505 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
506 FO.UIntPtrCastRequired ? FO.Loc
507 : CD->getBody()->getBeginLoc());
508 unsigned Cnt = CD->getContextParamPosition();
509 I = FO.S->captures().begin();
510 for (const FieldDecl *FD : RD->fields()) {
511 // Do not map arguments if we emit function with non-original types.
512 Address LocalAddr(Address::invalid());
513 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
514 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
515 TargetArgs[Cnt]);
516 } else {
517 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
518 }
519 // If we are capturing a pointer by copy we don't need to do anything, just
520 // use the value that we get from the arguments.
521 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
522 const VarDecl *CurVD = I->getCapturedVar();
523 if (!FO.RegisterCastedArgsOnly)
524 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
525 ++Cnt;
526 ++I;
527 continue;
528 }
529
530 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
531 AlignmentSource::Decl);
532 if (FD->hasCapturedVLAType()) {
533 if (FO.UIntPtrCastRequired) {
534 ArgLVal = CGF.MakeAddrLValue(
535 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
536 Args[Cnt]->getName(), ArgLVal),
537 FD->getType(), AlignmentSource::Decl);
538 }
539 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
540 const VariableArrayType *VAT = FD->getCapturedVLAType();
541 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
542 } else if (I->capturesVariable()) {
543 const VarDecl *Var = I->getCapturedVar();
544 QualType VarTy = Var->getType();
545 Address ArgAddr = ArgLVal.getAddress(CGF);
546 if (ArgLVal.getType()->isLValueReferenceType()) {
547 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
548 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
549 assert(ArgLVal.getType()->isPointerType());
550 ArgAddr = CGF.EmitLoadOfPointer(
551 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
552 }
553 if (!FO.RegisterCastedArgsOnly) {
554 LocalAddrs.insert(
555 {Args[Cnt],
556 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
557 }
558 } else if (I->capturesVariableByCopy()) {
559 assert(!FD->getType()->isAnyPointerType() &&
560 "Not expecting a captured pointer.");
561 const VarDecl *Var = I->getCapturedVar();
562 LocalAddrs.insert({Args[Cnt],
563 {Var, FO.UIntPtrCastRequired
564 ? castValueFromUintptr(
565 CGF, I->getLocation(), FD->getType(),
566 Args[Cnt]->getName(), ArgLVal)
567 : ArgLVal.getAddress(CGF)}});
568 } else {
569 // If 'this' is captured, load it into CXXThisValue.
570 assert(I->capturesThis());
571 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
572 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
573 }
574 ++Cnt;
575 ++I;
576 }
577
578 return F;
579 }
580
581 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)582 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
583 SourceLocation Loc) {
584 assert(
585 CapturedStmtInfo &&
586 "CapturedStmtInfo should be set when generating the captured function");
587 const CapturedDecl *CD = S.getCapturedDecl();
588 // Build the argument list.
589 bool NeedWrapperFunction =
590 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
591 FunctionArgList Args;
592 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
593 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
594 SmallString<256> Buffer;
595 llvm::raw_svector_ostream Out(Buffer);
596 Out << CapturedStmtInfo->getHelperName();
597 if (NeedWrapperFunction)
598 Out << "_debug__";
599 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
600 Out.str(), Loc);
601 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
602 VLASizes, CXXThisValue, FO);
603 CodeGenFunction::OMPPrivateScope LocalScope(*this);
604 for (const auto &LocalAddrPair : LocalAddrs) {
605 if (LocalAddrPair.second.first) {
606 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
607 return LocalAddrPair.second.second;
608 });
609 }
610 }
611 (void)LocalScope.Privatize();
612 for (const auto &VLASizePair : VLASizes)
613 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
614 PGO.assignRegionCounters(GlobalDecl(CD), F);
615 CapturedStmtInfo->EmitBody(*this, CD->getBody());
616 (void)LocalScope.ForceCleanup();
617 FinishFunction(CD->getBodyRBrace());
618 if (!NeedWrapperFunction)
619 return F;
620
621 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
622 /*RegisterCastedArgsOnly=*/true,
623 CapturedStmtInfo->getHelperName(), Loc);
624 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
625 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
626 Args.clear();
627 LocalAddrs.clear();
628 VLASizes.clear();
629 llvm::Function *WrapperF =
630 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
631 WrapperCGF.CXXThisValue, WrapperFO);
632 llvm::SmallVector<llvm::Value *, 4> CallArgs;
633 for (const auto *Arg : Args) {
634 llvm::Value *CallArg;
635 auto I = LocalAddrs.find(Arg);
636 if (I != LocalAddrs.end()) {
637 LValue LV = WrapperCGF.MakeAddrLValue(
638 I->second.second,
639 I->second.first ? I->second.first->getType() : Arg->getType(),
640 AlignmentSource::Decl);
641 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
642 } else {
643 auto EI = VLASizes.find(Arg);
644 if (EI != VLASizes.end()) {
645 CallArg = EI->second.second;
646 } else {
647 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
648 Arg->getType(),
649 AlignmentSource::Decl);
650 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
651 }
652 }
653 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
654 }
655 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
656 WrapperCGF.FinishFunction();
657 return WrapperF;
658 }
659
660 //===----------------------------------------------------------------------===//
661 // OpenMP Directive Emission
662 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)663 void CodeGenFunction::EmitOMPAggregateAssign(
664 Address DestAddr, Address SrcAddr, QualType OriginalType,
665 const llvm::function_ref<void(Address, Address)> CopyGen) {
666 // Perform element-by-element initialization.
667 QualType ElementTy;
668
669 // Drill down to the base element type on both arrays.
670 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
671 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
672 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
673
674 llvm::Value *SrcBegin = SrcAddr.getPointer();
675 llvm::Value *DestBegin = DestAddr.getPointer();
676 // Cast from pointer to array type to pointer to single element.
677 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
680 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
681 llvm::Value *IsEmpty =
682 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
683 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
687 EmitBlock(BodyBB);
688
689 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
690
691 llvm::PHINode *SrcElementPHI =
692 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
693 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
694 Address SrcElementCurrent =
695 Address(SrcElementPHI,
696 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
697
698 llvm::PHINode *DestElementPHI =
699 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
700 DestElementPHI->addIncoming(DestBegin, EntryBB);
701 Address DestElementCurrent =
702 Address(DestElementPHI,
703 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
704
705 // Emit copy.
706 CopyGen(DestElementCurrent, SrcElementCurrent);
707
708 // Shift the address forward by one element.
709 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
710 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
711 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
712 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
713 // Check whether we've reached the end.
714 llvm::Value *Done =
715 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
716 Builder.CreateCondBr(Done, DoneBB, BodyBB);
717 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
718 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
719
720 // Done.
721 EmitBlock(DoneBB, /*IsFinished=*/true);
722 }
723
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)724 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
725 Address SrcAddr, const VarDecl *DestVD,
726 const VarDecl *SrcVD, const Expr *Copy) {
727 if (OriginalType->isArrayType()) {
728 const auto *BO = dyn_cast<BinaryOperator>(Copy);
729 if (BO && BO->getOpcode() == BO_Assign) {
730 // Perform simple memcpy for simple copying.
731 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
732 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
733 EmitAggregateAssign(Dest, Src, OriginalType);
734 } else {
735 // For arrays with complex element types perform element by element
736 // copying.
737 EmitOMPAggregateAssign(
738 DestAddr, SrcAddr, OriginalType,
739 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
740 // Working with the single array element, so have to remap
741 // destination and source variables to corresponding array
742 // elements.
743 CodeGenFunction::OMPPrivateScope Remap(*this);
744 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
745 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
746 (void)Remap.Privatize();
747 EmitIgnoredExpr(Copy);
748 });
749 }
750 } else {
751 // Remap pseudo source variable to private copy.
752 CodeGenFunction::OMPPrivateScope Remap(*this);
753 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
754 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
755 (void)Remap.Privatize();
756 // Emit copying of the whole variable.
757 EmitIgnoredExpr(Copy);
758 }
759 }
760
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)761 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
762 OMPPrivateScope &PrivateScope) {
763 if (!HaveInsertPoint())
764 return false;
765 bool DeviceConstTarget =
766 getLangOpts().OpenMPIsDevice &&
767 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
768 bool FirstprivateIsLastprivate = false;
769 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
770 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
771 for (const auto *D : C->varlists())
772 Lastprivates.try_emplace(
773 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
774 C->getKind());
775 }
776 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
777 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
778 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
779 // Force emission of the firstprivate copy if the directive does not emit
780 // outlined function, like omp for, omp simd, omp distribute etc.
781 bool MustEmitFirstprivateCopy =
782 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
783 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
784 const auto *IRef = C->varlist_begin();
785 const auto *InitsRef = C->inits().begin();
786 for (const Expr *IInit : C->private_copies()) {
787 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
788 bool ThisFirstprivateIsLastprivate =
789 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
790 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
791 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
792 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
793 !FD->getType()->isReferenceType() &&
794 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
795 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
796 ++IRef;
797 ++InitsRef;
798 continue;
799 }
800 // Do not emit copy for firstprivate constant variables in target regions,
801 // captured by reference.
802 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
803 FD && FD->getType()->isReferenceType() &&
804 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
805 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
806 OrigVD);
807 ++IRef;
808 ++InitsRef;
809 continue;
810 }
811 FirstprivateIsLastprivate =
812 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
813 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
814 const auto *VDInit =
815 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
816 bool IsRegistered;
817 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
818 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
819 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
820 LValue OriginalLVal;
821 if (!FD) {
822 // Check if the firstprivate variable is just a constant value.
823 ConstantEmission CE = tryEmitAsConstant(&DRE);
824 if (CE && !CE.isReference()) {
825 // Constant value, no need to create a copy.
826 ++IRef;
827 ++InitsRef;
828 continue;
829 }
830 if (CE && CE.isReference()) {
831 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
832 } else {
833 assert(!CE && "Expected non-constant firstprivate.");
834 OriginalLVal = EmitLValue(&DRE);
835 }
836 } else {
837 OriginalLVal = EmitLValue(&DRE);
838 }
839 QualType Type = VD->getType();
840 if (Type->isArrayType()) {
841 // Emit VarDecl with copy init for arrays.
842 // Get the address of the original variable captured in current
843 // captured region.
844 IsRegistered = PrivateScope.addPrivate(
845 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
846 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
847 const Expr *Init = VD->getInit();
848 if (!isa<CXXConstructExpr>(Init) ||
849 isTrivialInitializer(Init)) {
850 // Perform simple memcpy.
851 LValue Dest =
852 MakeAddrLValue(Emission.getAllocatedAddress(), Type);
853 EmitAggregateAssign(Dest, OriginalLVal, Type);
854 } else {
855 EmitOMPAggregateAssign(
856 Emission.getAllocatedAddress(),
857 OriginalLVal.getAddress(*this), Type,
858 [this, VDInit, Init](Address DestElement,
859 Address SrcElement) {
860 // Clean up any temporaries needed by the
861 // initialization.
862 RunCleanupsScope InitScope(*this);
863 // Emit initialization for single element.
864 setAddrOfLocalVar(VDInit, SrcElement);
865 EmitAnyExprToMem(Init, DestElement,
866 Init->getType().getQualifiers(),
867 /*IsInitializer*/ false);
868 LocalDeclMap.erase(VDInit);
869 });
870 }
871 EmitAutoVarCleanups(Emission);
872 return Emission.getAllocatedAddress();
873 });
874 } else {
875 Address OriginalAddr = OriginalLVal.getAddress(*this);
876 IsRegistered =
877 PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
878 ThisFirstprivateIsLastprivate,
879 OrigVD, &Lastprivates, IRef]() {
880 // Emit private VarDecl with copy init.
881 // Remap temp VDInit variable to the address of the original
882 // variable (for proper handling of captured global variables).
883 setAddrOfLocalVar(VDInit, OriginalAddr);
884 EmitDecl(*VD);
885 LocalDeclMap.erase(VDInit);
886 if (ThisFirstprivateIsLastprivate &&
887 Lastprivates[OrigVD->getCanonicalDecl()] ==
888 OMPC_LASTPRIVATE_conditional) {
889 // Create/init special variable for lastprivate conditionals.
890 Address VDAddr =
891 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
892 *this, OrigVD);
893 llvm::Value *V = EmitLoadOfScalar(
894 MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
895 AlignmentSource::Decl),
896 (*IRef)->getExprLoc());
897 EmitStoreOfScalar(V,
898 MakeAddrLValue(VDAddr, (*IRef)->getType(),
899 AlignmentSource::Decl));
900 LocalDeclMap.erase(VD);
901 setAddrOfLocalVar(VD, VDAddr);
902 return VDAddr;
903 }
904 return GetAddrOfLocalVar(VD);
905 });
906 }
907 assert(IsRegistered &&
908 "firstprivate var already registered as private");
909 // Silence the warning about unused variable.
910 (void)IsRegistered;
911 }
912 ++IRef;
913 ++InitsRef;
914 }
915 }
916 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
917 }
918
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)919 void CodeGenFunction::EmitOMPPrivateClause(
920 const OMPExecutableDirective &D,
921 CodeGenFunction::OMPPrivateScope &PrivateScope) {
922 if (!HaveInsertPoint())
923 return;
924 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
925 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
926 auto IRef = C->varlist_begin();
927 for (const Expr *IInit : C->private_copies()) {
928 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
929 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
930 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
931 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
932 // Emit private VarDecl with copy init.
933 EmitDecl(*VD);
934 return GetAddrOfLocalVar(VD);
935 });
936 assert(IsRegistered && "private var already registered as private");
937 // Silence the warning about unused variable.
938 (void)IsRegistered;
939 }
940 ++IRef;
941 }
942 }
943 }
944
EmitOMPCopyinClause(const OMPExecutableDirective & D)945 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
946 if (!HaveInsertPoint())
947 return false;
948 // threadprivate_var1 = master_threadprivate_var1;
949 // operator=(threadprivate_var2, master_threadprivate_var2);
950 // ...
951 // __kmpc_barrier(&loc, global_tid);
952 llvm::DenseSet<const VarDecl *> CopiedVars;
953 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
954 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
955 auto IRef = C->varlist_begin();
956 auto ISrcRef = C->source_exprs().begin();
957 auto IDestRef = C->destination_exprs().begin();
958 for (const Expr *AssignOp : C->assignment_ops()) {
959 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
960 QualType Type = VD->getType();
961 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
962 // Get the address of the master variable. If we are emitting code with
963 // TLS support, the address is passed from the master as field in the
964 // captured declaration.
965 Address MasterAddr = Address::invalid();
966 if (getLangOpts().OpenMPUseTLS &&
967 getContext().getTargetInfo().isTLSSupported()) {
968 assert(CapturedStmtInfo->lookup(VD) &&
969 "Copyin threadprivates should have been captured!");
970 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
971 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
972 MasterAddr = EmitLValue(&DRE).getAddress(*this);
973 LocalDeclMap.erase(VD);
974 } else {
975 MasterAddr =
976 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
977 : CGM.GetAddrOfGlobal(VD),
978 getContext().getDeclAlign(VD));
979 }
980 // Get the address of the threadprivate variable.
981 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
982 if (CopiedVars.size() == 1) {
983 // At first check if current thread is a master thread. If it is, no
984 // need to copy data.
985 CopyBegin = createBasicBlock("copyin.not.master");
986 CopyEnd = createBasicBlock("copyin.not.master.end");
987 Builder.CreateCondBr(
988 Builder.CreateICmpNE(
989 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
990 Builder.CreatePtrToInt(PrivateAddr.getPointer(),
991 CGM.IntPtrTy)),
992 CopyBegin, CopyEnd);
993 EmitBlock(CopyBegin);
994 }
995 const auto *SrcVD =
996 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
997 const auto *DestVD =
998 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
999 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1000 }
1001 ++IRef;
1002 ++ISrcRef;
1003 ++IDestRef;
1004 }
1005 }
1006 if (CopyEnd) {
1007 // Exit out of copying procedure for non-master thread.
1008 EmitBlock(CopyEnd, /*IsFinished=*/true);
1009 return true;
1010 }
1011 return false;
1012 }
1013
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1014 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1015 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1016 if (!HaveInsertPoint())
1017 return false;
1018 bool HasAtLeastOneLastprivate = false;
1019 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1020 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1021 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1022 for (const Expr *C : LoopDirective->counters()) {
1023 SIMDLCVs.insert(
1024 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1025 }
1026 }
1027 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1028 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1029 HasAtLeastOneLastprivate = true;
1030 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1031 !getLangOpts().OpenMPSimd)
1032 break;
1033 const auto *IRef = C->varlist_begin();
1034 const auto *IDestRef = C->destination_exprs().begin();
1035 for (const Expr *IInit : C->private_copies()) {
1036 // Keep the address of the original variable for future update at the end
1037 // of the loop.
1038 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1039 // Taskloops do not require additional initialization, it is done in
1040 // runtime support library.
1041 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1042 const auto *DestVD =
1043 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1044 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1045 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1046 /*RefersToEnclosingVariableOrCapture=*/
1047 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1048 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1049 return EmitLValue(&DRE).getAddress(*this);
1050 });
1051 // Check if the variable is also a firstprivate: in this case IInit is
1052 // not generated. Initialization of this variable will happen in codegen
1053 // for 'firstprivate' clause.
1054 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1055 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1056 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1057 OrigVD]() {
1058 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1059 Address VDAddr =
1060 CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1061 OrigVD);
1062 setAddrOfLocalVar(VD, VDAddr);
1063 return VDAddr;
1064 }
1065 // Emit private VarDecl with copy init.
1066 EmitDecl(*VD);
1067 return GetAddrOfLocalVar(VD);
1068 });
1069 assert(IsRegistered &&
1070 "lastprivate var already registered as private");
1071 (void)IsRegistered;
1072 }
1073 }
1074 ++IRef;
1075 ++IDestRef;
1076 }
1077 }
1078 return HasAtLeastOneLastprivate;
1079 }
1080
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1081 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1082 const OMPExecutableDirective &D, bool NoFinals,
1083 llvm::Value *IsLastIterCond) {
1084 if (!HaveInsertPoint())
1085 return;
1086 // Emit following code:
1087 // if (<IsLastIterCond>) {
1088 // orig_var1 = private_orig_var1;
1089 // ...
1090 // orig_varn = private_orig_varn;
1091 // }
1092 llvm::BasicBlock *ThenBB = nullptr;
1093 llvm::BasicBlock *DoneBB = nullptr;
1094 if (IsLastIterCond) {
1095 // Emit implicit barrier if at least one lastprivate conditional is found
1096 // and this is not a simd mode.
1097 if (!getLangOpts().OpenMPSimd &&
1098 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1099 [](const OMPLastprivateClause *C) {
1100 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1101 })) {
1102 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1103 OMPD_unknown,
1104 /*EmitChecks=*/false,
1105 /*ForceSimpleCall=*/true);
1106 }
1107 ThenBB = createBasicBlock(".omp.lastprivate.then");
1108 DoneBB = createBasicBlock(".omp.lastprivate.done");
1109 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1110 EmitBlock(ThenBB);
1111 }
1112 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1113 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1114 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1115 auto IC = LoopDirective->counters().begin();
1116 for (const Expr *F : LoopDirective->finals()) {
1117 const auto *D =
1118 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1119 if (NoFinals)
1120 AlreadyEmittedVars.insert(D);
1121 else
1122 LoopCountersAndUpdates[D] = F;
1123 ++IC;
1124 }
1125 }
1126 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1127 auto IRef = C->varlist_begin();
1128 auto ISrcRef = C->source_exprs().begin();
1129 auto IDestRef = C->destination_exprs().begin();
1130 for (const Expr *AssignOp : C->assignment_ops()) {
1131 const auto *PrivateVD =
1132 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1133 QualType Type = PrivateVD->getType();
1134 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1135 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1136 // If lastprivate variable is a loop control variable for loop-based
1137 // directive, update its value before copyin back to original
1138 // variable.
1139 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1140 EmitIgnoredExpr(FinalExpr);
1141 const auto *SrcVD =
1142 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1143 const auto *DestVD =
1144 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1145 // Get the address of the private variable.
1146 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1147 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1148 PrivateAddr =
1149 Address(Builder.CreateLoad(PrivateAddr),
1150 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1151 // Store the last value to the private copy in the last iteration.
1152 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1153 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1154 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1155 (*IRef)->getExprLoc());
1156 // Get the address of the original variable.
1157 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1158 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1159 }
1160 ++IRef;
1161 ++ISrcRef;
1162 ++IDestRef;
1163 }
1164 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1165 EmitIgnoredExpr(PostUpdate);
1166 }
1167 if (IsLastIterCond)
1168 EmitBlock(DoneBB, /*IsFinished=*/true);
1169 }
1170
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1171 void CodeGenFunction::EmitOMPReductionClauseInit(
1172 const OMPExecutableDirective &D,
1173 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1174 if (!HaveInsertPoint())
1175 return;
1176 SmallVector<const Expr *, 4> Shareds;
1177 SmallVector<const Expr *, 4> Privates;
1178 SmallVector<const Expr *, 4> ReductionOps;
1179 SmallVector<const Expr *, 4> LHSs;
1180 SmallVector<const Expr *, 4> RHSs;
1181 OMPTaskDataTy Data;
1182 SmallVector<const Expr *, 4> TaskLHSs;
1183 SmallVector<const Expr *, 4> TaskRHSs;
1184 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1185 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1186 continue;
1187 Shareds.append(C->varlist_begin(), C->varlist_end());
1188 Privates.append(C->privates().begin(), C->privates().end());
1189 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1190 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1191 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1192 if (C->getModifier() == OMPC_REDUCTION_task) {
1193 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1194 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1195 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1196 Data.ReductionOps.append(C->reduction_ops().begin(),
1197 C->reduction_ops().end());
1198 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1199 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1200 }
1201 }
1202 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1203 unsigned Count = 0;
1204 auto *ILHS = LHSs.begin();
1205 auto *IRHS = RHSs.begin();
1206 auto *IPriv = Privates.begin();
1207 for (const Expr *IRef : Shareds) {
1208 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1209 // Emit private VarDecl with reduction init.
1210 RedCG.emitSharedOrigLValue(*this, Count);
1211 RedCG.emitAggregateType(*this, Count);
1212 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1213 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1214 RedCG.getSharedLValue(Count),
1215 [&Emission](CodeGenFunction &CGF) {
1216 CGF.EmitAutoVarInit(Emission);
1217 return true;
1218 });
1219 EmitAutoVarCleanups(Emission);
1220 Address BaseAddr = RedCG.adjustPrivateAddress(
1221 *this, Count, Emission.getAllocatedAddress());
1222 bool IsRegistered = PrivateScope.addPrivate(
1223 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1224 assert(IsRegistered && "private var already registered as private");
1225 // Silence the warning about unused variable.
1226 (void)IsRegistered;
1227
1228 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1229 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1230 QualType Type = PrivateVD->getType();
1231 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1232 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1233 // Store the address of the original variable associated with the LHS
1234 // implicit variable.
1235 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1236 return RedCG.getSharedLValue(Count).getAddress(*this);
1237 });
1238 PrivateScope.addPrivate(
1239 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1240 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1241 isa<ArraySubscriptExpr>(IRef)) {
1242 // Store the address of the original variable associated with the LHS
1243 // implicit variable.
1244 PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1245 return RedCG.getSharedLValue(Count).getAddress(*this);
1246 });
1247 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1248 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1249 ConvertTypeForMem(RHSVD->getType()),
1250 "rhs.begin");
1251 });
1252 } else {
1253 QualType Type = PrivateVD->getType();
1254 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1255 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1256 // Store the address of the original variable associated with the LHS
1257 // implicit variable.
1258 if (IsArray) {
1259 OriginalAddr = Builder.CreateElementBitCast(
1260 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1261 }
1262 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1263 PrivateScope.addPrivate(
1264 RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1265 return IsArray
1266 ? Builder.CreateElementBitCast(
1267 GetAddrOfLocalVar(PrivateVD),
1268 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1269 : GetAddrOfLocalVar(PrivateVD);
1270 });
1271 }
1272 ++ILHS;
1273 ++IRHS;
1274 ++IPriv;
1275 ++Count;
1276 }
1277 if (!Data.ReductionVars.empty()) {
1278 Data.IsReductionWithTaskMod = true;
1279 Data.IsWorksharingReduction =
1280 isOpenMPWorksharingDirective(D.getDirectiveKind());
1281 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1282 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1283 const Expr *TaskRedRef = nullptr;
1284 switch (D.getDirectiveKind()) {
1285 case OMPD_parallel:
1286 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1287 break;
1288 case OMPD_for:
1289 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1290 break;
1291 case OMPD_sections:
1292 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1293 break;
1294 case OMPD_parallel_for:
1295 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1296 break;
1297 case OMPD_parallel_master:
1298 TaskRedRef =
1299 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1300 break;
1301 case OMPD_parallel_sections:
1302 TaskRedRef =
1303 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1304 break;
1305 case OMPD_target_parallel:
1306 TaskRedRef =
1307 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1308 break;
1309 case OMPD_target_parallel_for:
1310 TaskRedRef =
1311 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1312 break;
1313 case OMPD_distribute_parallel_for:
1314 TaskRedRef =
1315 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1316 break;
1317 case OMPD_teams_distribute_parallel_for:
1318 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1319 .getTaskReductionRefExpr();
1320 break;
1321 case OMPD_target_teams_distribute_parallel_for:
1322 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1323 .getTaskReductionRefExpr();
1324 break;
1325 case OMPD_simd:
1326 case OMPD_for_simd:
1327 case OMPD_section:
1328 case OMPD_single:
1329 case OMPD_master:
1330 case OMPD_critical:
1331 case OMPD_parallel_for_simd:
1332 case OMPD_task:
1333 case OMPD_taskyield:
1334 case OMPD_barrier:
1335 case OMPD_taskwait:
1336 case OMPD_taskgroup:
1337 case OMPD_flush:
1338 case OMPD_depobj:
1339 case OMPD_scan:
1340 case OMPD_ordered:
1341 case OMPD_atomic:
1342 case OMPD_teams:
1343 case OMPD_target:
1344 case OMPD_cancellation_point:
1345 case OMPD_cancel:
1346 case OMPD_target_data:
1347 case OMPD_target_enter_data:
1348 case OMPD_target_exit_data:
1349 case OMPD_taskloop:
1350 case OMPD_taskloop_simd:
1351 case OMPD_master_taskloop:
1352 case OMPD_master_taskloop_simd:
1353 case OMPD_parallel_master_taskloop:
1354 case OMPD_parallel_master_taskloop_simd:
1355 case OMPD_distribute:
1356 case OMPD_target_update:
1357 case OMPD_distribute_parallel_for_simd:
1358 case OMPD_distribute_simd:
1359 case OMPD_target_parallel_for_simd:
1360 case OMPD_target_simd:
1361 case OMPD_teams_distribute:
1362 case OMPD_teams_distribute_simd:
1363 case OMPD_teams_distribute_parallel_for_simd:
1364 case OMPD_target_teams:
1365 case OMPD_target_teams_distribute:
1366 case OMPD_target_teams_distribute_parallel_for_simd:
1367 case OMPD_target_teams_distribute_simd:
1368 case OMPD_declare_target:
1369 case OMPD_end_declare_target:
1370 case OMPD_threadprivate:
1371 case OMPD_allocate:
1372 case OMPD_declare_reduction:
1373 case OMPD_declare_mapper:
1374 case OMPD_declare_simd:
1375 case OMPD_requires:
1376 case OMPD_declare_variant:
1377 case OMPD_begin_declare_variant:
1378 case OMPD_end_declare_variant:
1379 case OMPD_unknown:
1380 default:
1381 llvm_unreachable("Enexpected directive with task reductions.");
1382 }
1383
1384 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1385 EmitVarDecl(*VD);
1386 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1387 /*Volatile=*/false, TaskRedRef->getType());
1388 }
1389 }
1390
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1391 void CodeGenFunction::EmitOMPReductionClauseFinal(
1392 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1393 if (!HaveInsertPoint())
1394 return;
1395 llvm::SmallVector<const Expr *, 8> Privates;
1396 llvm::SmallVector<const Expr *, 8> LHSExprs;
1397 llvm::SmallVector<const Expr *, 8> RHSExprs;
1398 llvm::SmallVector<const Expr *, 8> ReductionOps;
1399 bool HasAtLeastOneReduction = false;
1400 bool IsReductionWithTaskMod = false;
1401 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1402 // Do not emit for inscan reductions.
1403 if (C->getModifier() == OMPC_REDUCTION_inscan)
1404 continue;
1405 HasAtLeastOneReduction = true;
1406 Privates.append(C->privates().begin(), C->privates().end());
1407 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1408 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1409 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1410 IsReductionWithTaskMod =
1411 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1412 }
1413 if (HasAtLeastOneReduction) {
1414 if (IsReductionWithTaskMod) {
1415 CGM.getOpenMPRuntime().emitTaskReductionFini(
1416 *this, D.getBeginLoc(),
1417 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1418 }
1419 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1420 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1421 ReductionKind == OMPD_simd;
1422 bool SimpleReduction = ReductionKind == OMPD_simd;
1423 // Emit nowait reduction if nowait clause is present or directive is a
1424 // parallel directive (it always has implicit barrier).
1425 CGM.getOpenMPRuntime().emitReduction(
1426 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1427 {WithNowait, SimpleReduction, ReductionKind});
1428 }
1429 }
1430
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1431 static void emitPostUpdateForReductionClause(
1432 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1433 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1434 if (!CGF.HaveInsertPoint())
1435 return;
1436 llvm::BasicBlock *DoneBB = nullptr;
1437 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1438 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1439 if (!DoneBB) {
1440 if (llvm::Value *Cond = CondGen(CGF)) {
1441 // If the first post-update expression is found, emit conditional
1442 // block if it was requested.
1443 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1444 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1445 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1446 CGF.EmitBlock(ThenBB);
1447 }
1448 }
1449 CGF.EmitIgnoredExpr(PostUpdate);
1450 }
1451 }
1452 if (DoneBB)
1453 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1454 }
1455
1456 namespace {
1457 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1458 /// parallel function. This is necessary for combined constructs such as
1459 /// 'distribute parallel for'
1460 typedef llvm::function_ref<void(CodeGenFunction &,
1461 const OMPExecutableDirective &,
1462 llvm::SmallVectorImpl<llvm::Value *> &)>
1463 CodeGenBoundParametersTy;
1464 } // anonymous namespace
1465
1466 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1467 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1468 const OMPExecutableDirective &S) {
1469 if (CGF.getLangOpts().OpenMP < 50)
1470 return;
1471 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1472 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1473 for (const Expr *Ref : C->varlists()) {
1474 if (!Ref->getType()->isScalarType())
1475 continue;
1476 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1477 if (!DRE)
1478 continue;
1479 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1480 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1481 }
1482 }
1483 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1484 for (const Expr *Ref : C->varlists()) {
1485 if (!Ref->getType()->isScalarType())
1486 continue;
1487 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1488 if (!DRE)
1489 continue;
1490 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1491 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1492 }
1493 }
1494 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1495 for (const Expr *Ref : C->varlists()) {
1496 if (!Ref->getType()->isScalarType())
1497 continue;
1498 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1499 if (!DRE)
1500 continue;
1501 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1502 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1503 }
1504 }
1505 // Privates should ne analyzed since they are not captured at all.
1506 // Task reductions may be skipped - tasks are ignored.
1507 // Firstprivates do not return value but may be passed by reference - no need
1508 // to check for updated lastprivate conditional.
1509 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1510 for (const Expr *Ref : C->varlists()) {
1511 if (!Ref->getType()->isScalarType())
1512 continue;
1513 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1514 if (!DRE)
1515 continue;
1516 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1517 }
1518 }
1519 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1520 CGF, S, PrivateDecls);
1521 }
1522
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1523 static void emitCommonOMPParallelDirective(
1524 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1525 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1526 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1527 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1528 llvm::Function *OutlinedFn =
1529 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1530 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1531 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1532 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1533 llvm::Value *NumThreads =
1534 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1535 /*IgnoreResultAssign=*/true);
1536 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1537 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1538 }
1539 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1540 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1541 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1542 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1543 }
1544 const Expr *IfCond = nullptr;
1545 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1546 if (C->getNameModifier() == OMPD_unknown ||
1547 C->getNameModifier() == OMPD_parallel) {
1548 IfCond = C->getCondition();
1549 break;
1550 }
1551 }
1552
1553 OMPParallelScope Scope(CGF, S);
1554 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1555 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1556 // lower and upper bounds with the pragma 'for' chunking mechanism.
1557 // The following lambda takes care of appending the lower and upper bound
1558 // parameters when necessary
1559 CodeGenBoundParameters(CGF, S, CapturedVars);
1560 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1561 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1562 CapturedVars, IfCond);
1563 }
1564
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1565 static void emitEmptyBoundParameters(CodeGenFunction &,
1566 const OMPExecutableDirective &,
1567 llvm::SmallVectorImpl<llvm::Value *> &) {}
1568
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1569 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1570 CodeGenFunction &CGF, const VarDecl *VD) {
1571 CodeGenModule &CGM = CGF.CGM;
1572 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1573
1574 if (!VD)
1575 return Address::invalid();
1576 const VarDecl *CVD = VD->getCanonicalDecl();
1577 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1578 return Address::invalid();
1579 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1580 // Use the default allocation.
1581 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
1582 !AA->getAllocator())
1583 return Address::invalid();
1584 llvm::Value *Size;
1585 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1586 if (CVD->getType()->isVariablyModifiedType()) {
1587 Size = CGF.getTypeSize(CVD->getType());
1588 // Align the size: ((size + align - 1) / align) * align
1589 Size = CGF.Builder.CreateNUWAdd(
1590 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1591 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1592 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1593 } else {
1594 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1595 Size = CGM.getSize(Sz.alignTo(Align));
1596 }
1597
1598 assert(AA->getAllocator() &&
1599 "Expected allocator expression for non-default allocator.");
1600 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1601 // According to the standard, the original allocator type is a enum (integer).
1602 // Convert to pointer type, if required.
1603 if (Allocator->getType()->isIntegerTy())
1604 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1605 else if (Allocator->getType()->isPointerTy())
1606 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1607 CGM.VoidPtrTy);
1608
1609 llvm::Value *Addr = OMPBuilder.CreateOMPAlloc(
1610 CGF.Builder, Size, Allocator,
1611 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1612 llvm::CallInst *FreeCI =
1613 OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator);
1614
1615 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1616 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1617 Addr,
1618 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1619 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1620 return Address(Addr, Align);
1621 }
1622
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1623 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1624 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1625 SourceLocation Loc) {
1626 CodeGenModule &CGM = CGF.CGM;
1627 if (CGM.getLangOpts().OpenMPUseTLS &&
1628 CGM.getContext().getTargetInfo().isTLSSupported())
1629 return VDAddr;
1630
1631 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1632
1633 llvm::Type *VarTy = VDAddr.getElementType();
1634 llvm::Value *Data =
1635 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1636 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1637 std::string Suffix = getNameWithSeparators({"cache", ""});
1638 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1639
1640 llvm::CallInst *ThreadPrivateCacheCall =
1641 OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1642
1643 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1644 }
1645
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1646 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1647 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1648 SmallString<128> Buffer;
1649 llvm::raw_svector_ostream OS(Buffer);
1650 StringRef Sep = FirstSeparator;
1651 for (StringRef Part : Parts) {
1652 OS << Sep << Part;
1653 Sep = Separator;
1654 }
1655 return OS.str().str();
1656 }
EmitOMPParallelDirective(const OMPParallelDirective & S)1657 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1658 if (CGM.getLangOpts().OpenMPIRBuilder) {
1659 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1660 // Check if we have any if clause associated with the directive.
1661 llvm::Value *IfCond = nullptr;
1662 if (const auto *C = S.getSingleClause<OMPIfClause>())
1663 IfCond = EmitScalarExpr(C->getCondition(),
1664 /*IgnoreResultAssign=*/true);
1665
1666 llvm::Value *NumThreads = nullptr;
1667 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1668 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1669 /*IgnoreResultAssign=*/true);
1670
1671 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1672 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1673 ProcBind = ProcBindClause->getProcBindKind();
1674
1675 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1676
1677 // The cleanup callback that finalizes all variabels at the given location,
1678 // thus calls destructors etc.
1679 auto FiniCB = [this](InsertPointTy IP) {
1680 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1681 };
1682
1683 // Privatization callback that performs appropriate action for
1684 // shared/private/firstprivate/lastprivate/copyin/... variables.
1685 //
1686 // TODO: This defaults to shared right now.
1687 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1688 llvm::Value &Val, llvm::Value *&ReplVal) {
1689 // The next line is appropriate only for variables (Val) with the
1690 // data-sharing attribute "shared".
1691 ReplVal = &Val;
1692
1693 return CodeGenIP;
1694 };
1695
1696 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1697 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1698
1699 auto BodyGenCB = [ParallelRegionBodyStmt,
1700 this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1701 llvm::BasicBlock &ContinuationBB) {
1702 OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1703 ContinuationBB);
1704 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1705 CodeGenIP, ContinuationBB);
1706 };
1707
1708 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1709 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1710 Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
1711 FiniCB, IfCond, NumThreads,
1712 ProcBind, S.hasCancel()));
1713 return;
1714 }
1715
1716 // Emit parallel region as a standalone region.
1717 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1718 Action.Enter(CGF);
1719 OMPPrivateScope PrivateScope(CGF);
1720 bool Copyins = CGF.EmitOMPCopyinClause(S);
1721 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1722 if (Copyins) {
1723 // Emit implicit barrier to synchronize threads and avoid data races on
1724 // propagation master's thread values of threadprivate variables to local
1725 // instances of that variables of all other implicit threads.
1726 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1727 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1728 /*ForceSimpleCall=*/true);
1729 }
1730 CGF.EmitOMPPrivateClause(S, PrivateScope);
1731 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1732 (void)PrivateScope.Privatize();
1733 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1734 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1735 };
1736 {
1737 auto LPCRegion =
1738 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1739 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1740 emitEmptyBoundParameters);
1741 emitPostUpdateForReductionClause(*this, S,
1742 [](CodeGenFunction &) { return nullptr; });
1743 }
1744 // Check for outer lastprivate conditional update.
1745 checkForLastprivateConditionalUpdate(*this, S);
1746 }
1747
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1748 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1749 int MaxLevel, int Level = 0) {
1750 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1751 const Stmt *SimplifiedS = S->IgnoreContainers();
1752 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1753 PrettyStackTraceLoc CrashInfo(
1754 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1755 "LLVM IR generation of compound statement ('{}')");
1756
1757 // Keep track of the current cleanup stack depth, including debug scopes.
1758 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1759 for (const Stmt *CurStmt : CS->body())
1760 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1761 return;
1762 }
1763 if (SimplifiedS == NextLoop) {
1764 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1765 S = For->getBody();
1766 } else {
1767 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1768 "Expected canonical for loop or range-based for loop.");
1769 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1770 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1771 S = CXXFor->getBody();
1772 }
1773 if (Level + 1 < MaxLevel) {
1774 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1775 S, /*TryImperfectlyNestedLoops=*/true);
1776 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1777 return;
1778 }
1779 }
1780 CGF.EmitStmt(S);
1781 }
1782
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1783 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1784 JumpDest LoopExit) {
1785 RunCleanupsScope BodyScope(*this);
1786 // Update counters values on current iteration.
1787 for (const Expr *UE : D.updates())
1788 EmitIgnoredExpr(UE);
1789 // Update the linear variables.
1790 // In distribute directives only loop counters may be marked as linear, no
1791 // need to generate the code for them.
1792 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1793 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1794 for (const Expr *UE : C->updates())
1795 EmitIgnoredExpr(UE);
1796 }
1797 }
1798
1799 // On a continue in the body, jump to the end.
1800 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1801 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1802 for (const Expr *E : D.finals_conditions()) {
1803 if (!E)
1804 continue;
1805 // Check that loop counter in non-rectangular nest fits into the iteration
1806 // space.
1807 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1808 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1809 getProfileCount(D.getBody()));
1810 EmitBlock(NextBB);
1811 }
1812
1813 OMPPrivateScope InscanScope(*this);
1814 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1815 bool IsInscanRegion = InscanScope.Privatize();
1816 if (IsInscanRegion) {
1817 // Need to remember the block before and after scan directive
1818 // to dispatch them correctly depending on the clause used in
1819 // this directive, inclusive or exclusive. For inclusive scan the natural
1820 // order of the blocks is used, for exclusive clause the blocks must be
1821 // executed in reverse order.
1822 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1823 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1824 // No need to allocate inscan exit block, in simd mode it is selected in the
1825 // codegen for the scan directive.
1826 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1827 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1828 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1829 EmitBranch(OMPScanDispatch);
1830 EmitBlock(OMPBeforeScanBlock);
1831 }
1832
1833 // Emit loop variables for C++ range loops.
1834 const Stmt *Body =
1835 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1836 // Emit loop body.
1837 emitBody(*this, Body,
1838 OMPLoopDirective::tryToFindNextInnerLoop(
1839 Body, /*TryImperfectlyNestedLoops=*/true),
1840 D.getCollapsedNumber());
1841
1842 // Jump to the dispatcher at the end of the loop body.
1843 if (IsInscanRegion)
1844 EmitBranch(OMPScanExitBlock);
1845
1846 // The end (updates/cleanups).
1847 EmitBlock(Continue.getBlock());
1848 BreakContinueStack.pop_back();
1849 }
1850
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)1851 void CodeGenFunction::EmitOMPInnerLoop(
1852 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
1853 const Expr *IncExpr,
1854 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1855 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1856 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1857
1858 // Start the loop with a block that tests the condition.
1859 auto CondBlock = createBasicBlock("omp.inner.for.cond");
1860 EmitBlock(CondBlock);
1861 const SourceRange R = S.getSourceRange();
1862
1863 // If attributes are attached, push to the basic block with them.
1864 const auto &OMPED = cast<OMPExecutableDirective>(S);
1865 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
1866 const Stmt *SS = ICS->getCapturedStmt();
1867 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
1868 if (AS)
1869 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
1870 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
1871 SourceLocToDebugLoc(R.getEnd()));
1872 else
1873 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1874 SourceLocToDebugLoc(R.getEnd()));
1875
1876 // If there are any cleanups between here and the loop-exit scope,
1877 // create a block to stage a loop exit along.
1878 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1879 if (RequiresCleanup)
1880 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1881
1882 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1883
1884 // Emit condition.
1885 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1886 if (ExitBlock != LoopExit.getBlock()) {
1887 EmitBlock(ExitBlock);
1888 EmitBranchThroughCleanup(LoopExit);
1889 }
1890
1891 EmitBlock(LoopBody);
1892 incrementProfileCounter(&S);
1893
1894 // Create a block for the increment.
1895 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1896 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1897
1898 BodyGen(*this);
1899
1900 // Emit "IV = IV + 1" and a back-edge to the condition block.
1901 EmitBlock(Continue.getBlock());
1902 EmitIgnoredExpr(IncExpr);
1903 PostIncGen(*this);
1904 BreakContinueStack.pop_back();
1905 EmitBranch(CondBlock);
1906 LoopStack.pop();
1907 // Emit the fall-through block.
1908 EmitBlock(LoopExit.getBlock());
1909 }
1910
EmitOMPLinearClauseInit(const OMPLoopDirective & D)1911 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1912 if (!HaveInsertPoint())
1913 return false;
1914 // Emit inits for the linear variables.
1915 bool HasLinears = false;
1916 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1917 for (const Expr *Init : C->inits()) {
1918 HasLinears = true;
1919 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1920 if (const auto *Ref =
1921 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1922 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1923 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1924 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1925 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1926 VD->getInit()->getType(), VK_LValue,
1927 VD->getInit()->getExprLoc());
1928 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1929 VD->getType()),
1930 /*capturedByInit=*/false);
1931 EmitAutoVarCleanups(Emission);
1932 } else {
1933 EmitVarDecl(*VD);
1934 }
1935 }
1936 // Emit the linear steps for the linear clauses.
1937 // If a step is not constant, it is pre-calculated before the loop.
1938 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1939 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1940 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1941 // Emit calculation of the linear step.
1942 EmitIgnoredExpr(CS);
1943 }
1944 }
1945 return HasLinears;
1946 }
1947
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1948 void CodeGenFunction::EmitOMPLinearClauseFinal(
1949 const OMPLoopDirective &D,
1950 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1951 if (!HaveInsertPoint())
1952 return;
1953 llvm::BasicBlock *DoneBB = nullptr;
1954 // Emit the final values of the linear variables.
1955 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1956 auto IC = C->varlist_begin();
1957 for (const Expr *F : C->finals()) {
1958 if (!DoneBB) {
1959 if (llvm::Value *Cond = CondGen(*this)) {
1960 // If the first post-update expression is found, emit conditional
1961 // block if it was requested.
1962 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1963 DoneBB = createBasicBlock(".omp.linear.pu.done");
1964 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1965 EmitBlock(ThenBB);
1966 }
1967 }
1968 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1969 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1970 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1971 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1972 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
1973 CodeGenFunction::OMPPrivateScope VarScope(*this);
1974 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1975 (void)VarScope.Privatize();
1976 EmitIgnoredExpr(F);
1977 ++IC;
1978 }
1979 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1980 EmitIgnoredExpr(PostUpdate);
1981 }
1982 if (DoneBB)
1983 EmitBlock(DoneBB, /*IsFinished=*/true);
1984 }
1985
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)1986 static void emitAlignedClause(CodeGenFunction &CGF,
1987 const OMPExecutableDirective &D) {
1988 if (!CGF.HaveInsertPoint())
1989 return;
1990 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1991 llvm::APInt ClauseAlignment(64, 0);
1992 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
1993 auto *AlignmentCI =
1994 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1995 ClauseAlignment = AlignmentCI->getValue();
1996 }
1997 for (const Expr *E : Clause->varlists()) {
1998 llvm::APInt Alignment(ClauseAlignment);
1999 if (Alignment == 0) {
2000 // OpenMP [2.8.1, Description]
2001 // If no optional parameter is specified, implementation-defined default
2002 // alignments for SIMD instructions on the target platforms are assumed.
2003 Alignment =
2004 CGF.getContext()
2005 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2006 E->getType()->getPointeeType()))
2007 .getQuantity();
2008 }
2009 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2010 "alignment is not power of 2");
2011 if (Alignment != 0) {
2012 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2013 CGF.emitAlignmentAssumption(
2014 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2015 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2016 }
2017 }
2018 }
2019 }
2020
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2021 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2022 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2023 if (!HaveInsertPoint())
2024 return;
2025 auto I = S.private_counters().begin();
2026 for (const Expr *E : S.counters()) {
2027 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2028 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2029 // Emit var without initialization.
2030 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2031 EmitAutoVarCleanups(VarEmission);
2032 LocalDeclMap.erase(PrivateVD);
2033 (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2034 return VarEmission.getAllocatedAddress();
2035 });
2036 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2037 VD->hasGlobalStorage()) {
2038 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2039 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2040 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2041 E->getType(), VK_LValue, E->getExprLoc());
2042 return EmitLValue(&DRE).getAddress(*this);
2043 });
2044 } else {
2045 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2046 return VarEmission.getAllocatedAddress();
2047 });
2048 }
2049 ++I;
2050 }
2051 // Privatize extra loop counters used in loops for ordered(n) clauses.
2052 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2053 if (!C->getNumForLoops())
2054 continue;
2055 for (unsigned I = S.getCollapsedNumber(),
2056 E = C->getLoopNumIterations().size();
2057 I < E; ++I) {
2058 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2059 const auto *VD = cast<VarDecl>(DRE->getDecl());
2060 // Override only those variables that can be captured to avoid re-emission
2061 // of the variables declared within the loops.
2062 if (DRE->refersToEnclosingVariableOrCapture()) {
2063 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2064 return CreateMemTemp(DRE->getType(), VD->getName());
2065 });
2066 }
2067 }
2068 }
2069 }
2070
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2071 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2072 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2073 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2074 if (!CGF.HaveInsertPoint())
2075 return;
2076 {
2077 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2078 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2079 (void)PreCondScope.Privatize();
2080 // Get initial values of real counters.
2081 for (const Expr *I : S.inits()) {
2082 CGF.EmitIgnoredExpr(I);
2083 }
2084 }
2085 // Create temp loop control variables with their init values to support
2086 // non-rectangular loops.
2087 CodeGenFunction::OMPMapVars PreCondVars;
2088 for (const Expr * E: S.dependent_counters()) {
2089 if (!E)
2090 continue;
2091 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2092 "dependent counter must not be an iterator.");
2093 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2094 Address CounterAddr =
2095 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2096 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2097 }
2098 (void)PreCondVars.apply(CGF);
2099 for (const Expr *E : S.dependent_inits()) {
2100 if (!E)
2101 continue;
2102 CGF.EmitIgnoredExpr(E);
2103 }
2104 // Check that loop is executed at least one time.
2105 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2106 PreCondVars.restore(CGF);
2107 }
2108
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2109 void CodeGenFunction::EmitOMPLinearClause(
2110 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2111 if (!HaveInsertPoint())
2112 return;
2113 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2114 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2115 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2116 for (const Expr *C : LoopDirective->counters()) {
2117 SIMDLCVs.insert(
2118 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2119 }
2120 }
2121 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2122 auto CurPrivate = C->privates().begin();
2123 for (const Expr *E : C->varlists()) {
2124 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2125 const auto *PrivateVD =
2126 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2127 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2128 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2129 // Emit private VarDecl with copy init.
2130 EmitVarDecl(*PrivateVD);
2131 return GetAddrOfLocalVar(PrivateVD);
2132 });
2133 assert(IsRegistered && "linear var already registered as private");
2134 // Silence the warning about unused variable.
2135 (void)IsRegistered;
2136 } else {
2137 EmitVarDecl(*PrivateVD);
2138 }
2139 ++CurPrivate;
2140 }
2141 }
2142 }
2143
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,bool IsMonotonic)2144 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2145 const OMPExecutableDirective &D,
2146 bool IsMonotonic) {
2147 if (!CGF.HaveInsertPoint())
2148 return;
2149 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2150 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2151 /*ignoreResult=*/true);
2152 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2153 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2154 // In presence of finite 'safelen', it may be unsafe to mark all
2155 // the memory instructions parallel, because loop-carried
2156 // dependences of 'safelen' iterations are possible.
2157 if (!IsMonotonic)
2158 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2159 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2160 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2161 /*ignoreResult=*/true);
2162 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2163 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2164 // In presence of finite 'safelen', it may be unsafe to mark all
2165 // the memory instructions parallel, because loop-carried
2166 // dependences of 'safelen' iterations are possible.
2167 CGF.LoopStack.setParallel(/*Enable=*/false);
2168 }
2169 }
2170
EmitOMPSimdInit(const OMPLoopDirective & D,bool IsMonotonic)2171 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
2172 bool IsMonotonic) {
2173 // Walk clauses and process safelen/lastprivate.
2174 LoopStack.setParallel(!IsMonotonic);
2175 LoopStack.setVectorizeEnable();
2176 emitSimdlenSafelenClause(*this, D, IsMonotonic);
2177 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2178 if (C->getKind() == OMPC_ORDER_concurrent)
2179 LoopStack.setParallel(/*Enable=*/true);
2180 if ((D.getDirectiveKind() == OMPD_simd ||
2181 (getLangOpts().OpenMPSimd &&
2182 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2183 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2184 [](const OMPReductionClause *C) {
2185 return C->getModifier() == OMPC_REDUCTION_inscan;
2186 }))
2187 // Disable parallel access in case of prefix sum.
2188 LoopStack.setParallel(/*Enable=*/false);
2189 }
2190
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2191 void CodeGenFunction::EmitOMPSimdFinal(
2192 const OMPLoopDirective &D,
2193 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2194 if (!HaveInsertPoint())
2195 return;
2196 llvm::BasicBlock *DoneBB = nullptr;
2197 auto IC = D.counters().begin();
2198 auto IPC = D.private_counters().begin();
2199 for (const Expr *F : D.finals()) {
2200 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2201 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2202 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2203 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2204 OrigVD->hasGlobalStorage() || CED) {
2205 if (!DoneBB) {
2206 if (llvm::Value *Cond = CondGen(*this)) {
2207 // If the first post-update expression is found, emit conditional
2208 // block if it was requested.
2209 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2210 DoneBB = createBasicBlock(".omp.final.done");
2211 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2212 EmitBlock(ThenBB);
2213 }
2214 }
2215 Address OrigAddr = Address::invalid();
2216 if (CED) {
2217 OrigAddr =
2218 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2219 } else {
2220 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2221 /*RefersToEnclosingVariableOrCapture=*/false,
2222 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2223 OrigAddr = EmitLValue(&DRE).getAddress(*this);
2224 }
2225 OMPPrivateScope VarScope(*this);
2226 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2227 (void)VarScope.Privatize();
2228 EmitIgnoredExpr(F);
2229 }
2230 ++IC;
2231 ++IPC;
2232 }
2233 if (DoneBB)
2234 EmitBlock(DoneBB, /*IsFinished=*/true);
2235 }
2236
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2237 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2238 const OMPLoopDirective &S,
2239 CodeGenFunction::JumpDest LoopExit) {
2240 CGF.EmitOMPLoopBody(S, LoopExit);
2241 CGF.EmitStopPoint(&S);
2242 }
2243
2244 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2245 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2246 const DeclRefExpr *Helper) {
2247 auto VDecl = cast<VarDecl>(Helper->getDecl());
2248 CGF.EmitVarDecl(*VDecl);
2249 return CGF.EmitLValue(Helper);
2250 }
2251
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2252 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2253 const RegionCodeGenTy &SimdInitGen,
2254 const RegionCodeGenTy &BodyCodeGen) {
2255 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2256 PrePostActionTy &) {
2257 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2258 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2259 SimdInitGen(CGF);
2260
2261 BodyCodeGen(CGF);
2262 };
2263 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2264 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2265 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2266
2267 BodyCodeGen(CGF);
2268 };
2269 const Expr *IfCond = nullptr;
2270 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2271 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2272 if (CGF.getLangOpts().OpenMP >= 50 &&
2273 (C->getNameModifier() == OMPD_unknown ||
2274 C->getNameModifier() == OMPD_simd)) {
2275 IfCond = C->getCondition();
2276 break;
2277 }
2278 }
2279 }
2280 if (IfCond) {
2281 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2282 } else {
2283 RegionCodeGenTy ThenRCG(ThenGen);
2284 ThenRCG(CGF);
2285 }
2286 }
2287
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2288 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2289 PrePostActionTy &Action) {
2290 Action.Enter(CGF);
2291 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2292 "Expected simd directive");
2293 OMPLoopScope PreInitScope(CGF, S);
2294 // if (PreCond) {
2295 // for (IV in 0..LastIteration) BODY;
2296 // <Final counter/linear vars updates>;
2297 // }
2298 //
2299 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2300 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2301 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2302 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2303 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2304 }
2305
2306 // Emit: if (PreCond) - begin.
2307 // If the condition constant folds and can be elided, avoid emitting the
2308 // whole loop.
2309 bool CondConstant;
2310 llvm::BasicBlock *ContBlock = nullptr;
2311 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2312 if (!CondConstant)
2313 return;
2314 } else {
2315 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2316 ContBlock = CGF.createBasicBlock("simd.if.end");
2317 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2318 CGF.getProfileCount(&S));
2319 CGF.EmitBlock(ThenBlock);
2320 CGF.incrementProfileCounter(&S);
2321 }
2322
2323 // Emit the loop iteration variable.
2324 const Expr *IVExpr = S.getIterationVariable();
2325 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2326 CGF.EmitVarDecl(*IVDecl);
2327 CGF.EmitIgnoredExpr(S.getInit());
2328
2329 // Emit the iterations count variable.
2330 // If it is not a variable, Sema decided to calculate iterations count on
2331 // each iteration (e.g., it is foldable into a constant).
2332 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2333 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2334 // Emit calculation of the iterations count.
2335 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2336 }
2337
2338 emitAlignedClause(CGF, S);
2339 (void)CGF.EmitOMPLinearClauseInit(S);
2340 {
2341 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2342 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2343 CGF.EmitOMPLinearClause(S, LoopScope);
2344 CGF.EmitOMPPrivateClause(S, LoopScope);
2345 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2346 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2347 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2348 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2349 (void)LoopScope.Privatize();
2350 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2351 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2352
2353 emitCommonSimdLoop(
2354 CGF, S,
2355 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2356 CGF.EmitOMPSimdInit(S);
2357 },
2358 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2359 CGF.EmitOMPInnerLoop(
2360 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2361 [&S](CodeGenFunction &CGF) {
2362 emitOMPLoopBodyWithStopPoint(CGF, S,
2363 CodeGenFunction::JumpDest());
2364 },
2365 [](CodeGenFunction &) {});
2366 });
2367 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2368 // Emit final copy of the lastprivate variables at the end of loops.
2369 if (HasLastprivateClause)
2370 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2371 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2372 emitPostUpdateForReductionClause(CGF, S,
2373 [](CodeGenFunction &) { return nullptr; });
2374 }
2375 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2376 // Emit: if (PreCond) - end.
2377 if (ContBlock) {
2378 CGF.EmitBranch(ContBlock);
2379 CGF.EmitBlock(ContBlock, true);
2380 }
2381 }
2382
EmitOMPSimdDirective(const OMPSimdDirective & S)2383 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2384 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2385 OMPFirstScanLoop = true;
2386 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2387 emitOMPSimdRegion(CGF, S, Action);
2388 };
2389 {
2390 auto LPCRegion =
2391 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2392 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2393 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2394 }
2395 // Check for outer lastprivate conditional update.
2396 checkForLastprivateConditionalUpdate(*this, S);
2397 }
2398
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2399 void CodeGenFunction::EmitOMPOuterLoop(
2400 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2401 CodeGenFunction::OMPPrivateScope &LoopScope,
2402 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2403 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2404 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2405 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2406
2407 const Expr *IVExpr = S.getIterationVariable();
2408 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2409 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2410
2411 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2412
2413 // Start the loop with a block that tests the condition.
2414 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2415 EmitBlock(CondBlock);
2416 const SourceRange R = S.getSourceRange();
2417 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2418 SourceLocToDebugLoc(R.getEnd()));
2419
2420 llvm::Value *BoolCondVal = nullptr;
2421 if (!DynamicOrOrdered) {
2422 // UB = min(UB, GlobalUB) or
2423 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2424 // 'distribute parallel for')
2425 EmitIgnoredExpr(LoopArgs.EUB);
2426 // IV = LB
2427 EmitIgnoredExpr(LoopArgs.Init);
2428 // IV < UB
2429 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2430 } else {
2431 BoolCondVal =
2432 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2433 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2434 }
2435
2436 // If there are any cleanups between here and the loop-exit scope,
2437 // create a block to stage a loop exit along.
2438 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2439 if (LoopScope.requiresCleanups())
2440 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2441
2442 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2443 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2444 if (ExitBlock != LoopExit.getBlock()) {
2445 EmitBlock(ExitBlock);
2446 EmitBranchThroughCleanup(LoopExit);
2447 }
2448 EmitBlock(LoopBody);
2449
2450 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2451 // LB for loop condition and emitted it above).
2452 if (DynamicOrOrdered)
2453 EmitIgnoredExpr(LoopArgs.Init);
2454
2455 // Create a block for the increment.
2456 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2457 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2458
2459 emitCommonSimdLoop(
2460 *this, S,
2461 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2462 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2463 // with dynamic/guided scheduling and without ordered clause.
2464 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2465 CGF.LoopStack.setParallel(!IsMonotonic);
2466 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2467 if (C->getKind() == OMPC_ORDER_concurrent)
2468 CGF.LoopStack.setParallel(/*Enable=*/true);
2469 } else {
2470 CGF.EmitOMPSimdInit(S, IsMonotonic);
2471 }
2472 },
2473 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2474 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2475 SourceLocation Loc = S.getBeginLoc();
2476 // when 'distribute' is not combined with a 'for':
2477 // while (idx <= UB) { BODY; ++idx; }
2478 // when 'distribute' is combined with a 'for'
2479 // (e.g. 'distribute parallel for')
2480 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2481 CGF.EmitOMPInnerLoop(
2482 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2483 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2484 CodeGenLoop(CGF, S, LoopExit);
2485 },
2486 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2487 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2488 });
2489 });
2490
2491 EmitBlock(Continue.getBlock());
2492 BreakContinueStack.pop_back();
2493 if (!DynamicOrOrdered) {
2494 // Emit "LB = LB + Stride", "UB = UB + Stride".
2495 EmitIgnoredExpr(LoopArgs.NextLB);
2496 EmitIgnoredExpr(LoopArgs.NextUB);
2497 }
2498
2499 EmitBranch(CondBlock);
2500 LoopStack.pop();
2501 // Emit the fall-through block.
2502 EmitBlock(LoopExit.getBlock());
2503
2504 // Tell the runtime we are done.
2505 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2506 if (!DynamicOrOrdered)
2507 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2508 S.getDirectiveKind());
2509 };
2510 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2511 }
2512
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2513 void CodeGenFunction::EmitOMPForOuterLoop(
2514 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2515 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2516 const OMPLoopArguments &LoopArgs,
2517 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2518 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2519
2520 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2521 const bool DynamicOrOrdered =
2522 Ordered || RT.isDynamic(ScheduleKind.Schedule);
2523
2524 assert((Ordered ||
2525 !RT.isStaticNonchunked(ScheduleKind.Schedule,
2526 LoopArgs.Chunk != nullptr)) &&
2527 "static non-chunked schedule does not need outer loop");
2528
2529 // Emit outer loop.
2530 //
2531 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2532 // When schedule(dynamic,chunk_size) is specified, the iterations are
2533 // distributed to threads in the team in chunks as the threads request them.
2534 // Each thread executes a chunk of iterations, then requests another chunk,
2535 // until no chunks remain to be distributed. Each chunk contains chunk_size
2536 // iterations, except for the last chunk to be distributed, which may have
2537 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2538 //
2539 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2540 // to threads in the team in chunks as the executing threads request them.
2541 // Each thread executes a chunk of iterations, then requests another chunk,
2542 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2543 // each chunk is proportional to the number of unassigned iterations divided
2544 // by the number of threads in the team, decreasing to 1. For a chunk_size
2545 // with value k (greater than 1), the size of each chunk is determined in the
2546 // same way, with the restriction that the chunks do not contain fewer than k
2547 // iterations (except for the last chunk to be assigned, which may have fewer
2548 // than k iterations).
2549 //
2550 // When schedule(auto) is specified, the decision regarding scheduling is
2551 // delegated to the compiler and/or runtime system. The programmer gives the
2552 // implementation the freedom to choose any possible mapping of iterations to
2553 // threads in the team.
2554 //
2555 // When schedule(runtime) is specified, the decision regarding scheduling is
2556 // deferred until run time, and the schedule and chunk size are taken from the
2557 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2558 // implementation defined
2559 //
2560 // while(__kmpc_dispatch_next(&LB, &UB)) {
2561 // idx = LB;
2562 // while (idx <= UB) { BODY; ++idx;
2563 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2564 // } // inner loop
2565 // }
2566 //
2567 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2568 // When schedule(static, chunk_size) is specified, iterations are divided into
2569 // chunks of size chunk_size, and the chunks are assigned to the threads in
2570 // the team in a round-robin fashion in the order of the thread number.
2571 //
2572 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2573 // while (idx <= UB) { BODY; ++idx; } // inner loop
2574 // LB = LB + ST;
2575 // UB = UB + ST;
2576 // }
2577 //
2578
2579 const Expr *IVExpr = S.getIterationVariable();
2580 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2581 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2582
2583 if (DynamicOrOrdered) {
2584 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2585 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2586 llvm::Value *LBVal = DispatchBounds.first;
2587 llvm::Value *UBVal = DispatchBounds.second;
2588 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2589 LoopArgs.Chunk};
2590 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2591 IVSigned, Ordered, DipatchRTInputValues);
2592 } else {
2593 CGOpenMPRuntime::StaticRTInput StaticInit(
2594 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2595 LoopArgs.ST, LoopArgs.Chunk);
2596 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2597 ScheduleKind, StaticInit);
2598 }
2599
2600 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2601 const unsigned IVSize,
2602 const bool IVSigned) {
2603 if (Ordered) {
2604 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2605 IVSigned);
2606 }
2607 };
2608
2609 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2610 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2611 OuterLoopArgs.IncExpr = S.getInc();
2612 OuterLoopArgs.Init = S.getInit();
2613 OuterLoopArgs.Cond = S.getCond();
2614 OuterLoopArgs.NextLB = S.getNextLowerBound();
2615 OuterLoopArgs.NextUB = S.getNextUpperBound();
2616 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2617 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2618 }
2619
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)2620 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2621 const unsigned IVSize, const bool IVSigned) {}
2622
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)2623 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2624 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2625 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2626 const CodeGenLoopTy &CodeGenLoopContent) {
2627
2628 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2629
2630 // Emit outer loop.
2631 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2632 // dynamic
2633 //
2634
2635 const Expr *IVExpr = S.getIterationVariable();
2636 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2637 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2638
2639 CGOpenMPRuntime::StaticRTInput StaticInit(
2640 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2641 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2642 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2643
2644 // for combined 'distribute' and 'for' the increment expression of distribute
2645 // is stored in DistInc. For 'distribute' alone, it is in Inc.
2646 Expr *IncExpr;
2647 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2648 IncExpr = S.getDistInc();
2649 else
2650 IncExpr = S.getInc();
2651
2652 // this routine is shared by 'omp distribute parallel for' and
2653 // 'omp distribute': select the right EUB expression depending on the
2654 // directive
2655 OMPLoopArguments OuterLoopArgs;
2656 OuterLoopArgs.LB = LoopArgs.LB;
2657 OuterLoopArgs.UB = LoopArgs.UB;
2658 OuterLoopArgs.ST = LoopArgs.ST;
2659 OuterLoopArgs.IL = LoopArgs.IL;
2660 OuterLoopArgs.Chunk = LoopArgs.Chunk;
2661 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2662 ? S.getCombinedEnsureUpperBound()
2663 : S.getEnsureUpperBound();
2664 OuterLoopArgs.IncExpr = IncExpr;
2665 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2666 ? S.getCombinedInit()
2667 : S.getInit();
2668 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2669 ? S.getCombinedCond()
2670 : S.getCond();
2671 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2672 ? S.getCombinedNextLowerBound()
2673 : S.getNextLowerBound();
2674 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2675 ? S.getCombinedNextUpperBound()
2676 : S.getNextUpperBound();
2677
2678 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2679 LoopScope, OuterLoopArgs, CodeGenLoopContent,
2680 emitEmptyOrdered);
2681 }
2682
2683 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)2684 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2685 const OMPExecutableDirective &S) {
2686 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2687 LValue LB =
2688 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2689 LValue UB =
2690 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2691
2692 // When composing 'distribute' with 'for' (e.g. as in 'distribute
2693 // parallel for') we need to use the 'distribute'
2694 // chunk lower and upper bounds rather than the whole loop iteration
2695 // space. These are parameters to the outlined function for 'parallel'
2696 // and we copy the bounds of the previous schedule into the
2697 // the current ones.
2698 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2699 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2700 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2701 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2702 PrevLBVal = CGF.EmitScalarConversion(
2703 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2704 LS.getIterationVariable()->getType(),
2705 LS.getPrevLowerBoundVariable()->getExprLoc());
2706 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2707 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2708 PrevUBVal = CGF.EmitScalarConversion(
2709 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2710 LS.getIterationVariable()->getType(),
2711 LS.getPrevUpperBoundVariable()->getExprLoc());
2712
2713 CGF.EmitStoreOfScalar(PrevLBVal, LB);
2714 CGF.EmitStoreOfScalar(PrevUBVal, UB);
2715
2716 return {LB, UB};
2717 }
2718
2719 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2720 /// we need to use the LB and UB expressions generated by the worksharing
2721 /// code generation support, whereas in non combined situations we would
2722 /// just emit 0 and the LastIteration expression
2723 /// This function is necessary due to the difference of the LB and UB
2724 /// types for the RT emission routines for 'for_static_init' and
2725 /// 'for_dispatch_init'
2726 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)2727 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2728 const OMPExecutableDirective &S,
2729 Address LB, Address UB) {
2730 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2731 const Expr *IVExpr = LS.getIterationVariable();
2732 // when implementing a dynamic schedule for a 'for' combined with a
2733 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2734 // is not normalized as each team only executes its own assigned
2735 // distribute chunk
2736 QualType IteratorTy = IVExpr->getType();
2737 llvm::Value *LBVal =
2738 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2739 llvm::Value *UBVal =
2740 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2741 return {LBVal, UBVal};
2742 }
2743
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)2744 static void emitDistributeParallelForDistributeInnerBoundParams(
2745 CodeGenFunction &CGF, const OMPExecutableDirective &S,
2746 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2747 const auto &Dir = cast<OMPLoopDirective>(S);
2748 LValue LB =
2749 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2750 llvm::Value *LBCast =
2751 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2752 CGF.SizeTy, /*isSigned=*/false);
2753 CapturedVars.push_back(LBCast);
2754 LValue UB =
2755 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2756
2757 llvm::Value *UBCast =
2758 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2759 CGF.SizeTy, /*isSigned=*/false);
2760 CapturedVars.push_back(UBCast);
2761 }
2762
2763 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2764 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2765 const OMPLoopDirective &S,
2766 CodeGenFunction::JumpDest LoopExit) {
2767 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2768 PrePostActionTy &Action) {
2769 Action.Enter(CGF);
2770 bool HasCancel = false;
2771 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2772 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2773 HasCancel = D->hasCancel();
2774 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2775 HasCancel = D->hasCancel();
2776 else if (const auto *D =
2777 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2778 HasCancel = D->hasCancel();
2779 }
2780 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2781 HasCancel);
2782 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2783 emitDistributeParallelForInnerBounds,
2784 emitDistributeParallelForDispatchBounds);
2785 };
2786
2787 emitCommonOMPParallelDirective(
2788 CGF, S,
2789 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2790 CGInlinedWorksharingLoop,
2791 emitDistributeParallelForDistributeInnerBoundParams);
2792 }
2793
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)2794 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2795 const OMPDistributeParallelForDirective &S) {
2796 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2797 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2798 S.getDistInc());
2799 };
2800 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2801 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2802 }
2803
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)2804 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2805 const OMPDistributeParallelForSimdDirective &S) {
2806 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2807 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2808 S.getDistInc());
2809 };
2810 OMPLexicalScope Scope(*this, S, OMPD_parallel);
2811 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2812 }
2813
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)2814 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2815 const OMPDistributeSimdDirective &S) {
2816 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2817 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2818 };
2819 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2820 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2821 }
2822
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)2823 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2824 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2825 // Emit SPMD target parallel for region as a standalone region.
2826 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2827 emitOMPSimdRegion(CGF, S, Action);
2828 };
2829 llvm::Function *Fn;
2830 llvm::Constant *Addr;
2831 // Emit target region as a standalone region.
2832 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2833 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2834 assert(Fn && Addr && "Target device function emission failed.");
2835 }
2836
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)2837 void CodeGenFunction::EmitOMPTargetSimdDirective(
2838 const OMPTargetSimdDirective &S) {
2839 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2840 emitOMPSimdRegion(CGF, S, Action);
2841 };
2842 emitCommonOMPTargetDirective(*this, S, CodeGen);
2843 }
2844
2845 namespace {
2846 struct ScheduleKindModifiersTy {
2847 OpenMPScheduleClauseKind Kind;
2848 OpenMPScheduleClauseModifier M1;
2849 OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anoncd1c19713e11::ScheduleKindModifiersTy2850 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2851 OpenMPScheduleClauseModifier M1,
2852 OpenMPScheduleClauseModifier M2)
2853 : Kind(Kind), M1(M1), M2(M2) {}
2854 };
2855 } // namespace
2856
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)2857 bool CodeGenFunction::EmitOMPWorksharingLoop(
2858 const OMPLoopDirective &S, Expr *EUB,
2859 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2860 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2861 // Emit the loop iteration variable.
2862 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2863 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2864 EmitVarDecl(*IVDecl);
2865
2866 // Emit the iterations count variable.
2867 // If it is not a variable, Sema decided to calculate iterations count on each
2868 // iteration (e.g., it is foldable into a constant).
2869 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2870 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2871 // Emit calculation of the iterations count.
2872 EmitIgnoredExpr(S.getCalcLastIteration());
2873 }
2874
2875 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2876
2877 bool HasLastprivateClause;
2878 // Check pre-condition.
2879 {
2880 OMPLoopScope PreInitScope(*this, S);
2881 // Skip the entire loop if we don't meet the precondition.
2882 // If the condition constant folds and can be elided, avoid emitting the
2883 // whole loop.
2884 bool CondConstant;
2885 llvm::BasicBlock *ContBlock = nullptr;
2886 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2887 if (!CondConstant)
2888 return false;
2889 } else {
2890 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2891 ContBlock = createBasicBlock("omp.precond.end");
2892 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2893 getProfileCount(&S));
2894 EmitBlock(ThenBlock);
2895 incrementProfileCounter(&S);
2896 }
2897
2898 RunCleanupsScope DoacrossCleanupScope(*this);
2899 bool Ordered = false;
2900 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2901 if (OrderedClause->getNumForLoops())
2902 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2903 else
2904 Ordered = true;
2905 }
2906
2907 llvm::DenseSet<const Expr *> EmittedFinals;
2908 emitAlignedClause(*this, S);
2909 bool HasLinears = EmitOMPLinearClauseInit(S);
2910 // Emit helper vars inits.
2911
2912 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2913 LValue LB = Bounds.first;
2914 LValue UB = Bounds.second;
2915 LValue ST =
2916 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2917 LValue IL =
2918 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2919
2920 // Emit 'then' code.
2921 {
2922 OMPPrivateScope LoopScope(*this);
2923 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
2924 // Emit implicit barrier to synchronize threads and avoid data races on
2925 // initialization of firstprivate variables and post-update of
2926 // lastprivate variables.
2927 CGM.getOpenMPRuntime().emitBarrierCall(
2928 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2929 /*ForceSimpleCall=*/true);
2930 }
2931 EmitOMPPrivateClause(S, LoopScope);
2932 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2933 *this, S, EmitLValue(S.getIterationVariable()));
2934 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2935 EmitOMPReductionClauseInit(S, LoopScope);
2936 EmitOMPPrivateLoopCounters(S, LoopScope);
2937 EmitOMPLinearClause(S, LoopScope);
2938 (void)LoopScope.Privatize();
2939 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2940 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2941
2942 // Detect the loop schedule kind and chunk.
2943 const Expr *ChunkExpr = nullptr;
2944 OpenMPScheduleTy ScheduleKind;
2945 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2946 ScheduleKind.Schedule = C->getScheduleKind();
2947 ScheduleKind.M1 = C->getFirstScheduleModifier();
2948 ScheduleKind.M2 = C->getSecondScheduleModifier();
2949 ChunkExpr = C->getChunkSize();
2950 } else {
2951 // Default behaviour for schedule clause.
2952 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2953 *this, S, ScheduleKind.Schedule, ChunkExpr);
2954 }
2955 bool HasChunkSizeOne = false;
2956 llvm::Value *Chunk = nullptr;
2957 if (ChunkExpr) {
2958 Chunk = EmitScalarExpr(ChunkExpr);
2959 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2960 S.getIterationVariable()->getType(),
2961 S.getBeginLoc());
2962 Expr::EvalResult Result;
2963 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2964 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2965 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2966 }
2967 }
2968 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2969 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2970 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2971 // If the static schedule kind is specified or if the ordered clause is
2972 // specified, and if no monotonic modifier is specified, the effect will
2973 // be as if the monotonic modifier was specified.
2974 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2975 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2976 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
2977 bool IsMonotonic =
2978 Ordered ||
2979 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2980 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
2981 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
2982 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
2983 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2984 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2985 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2986 /* Chunked */ Chunk != nullptr) ||
2987 StaticChunkedOne) &&
2988 !Ordered) {
2989 JumpDest LoopExit =
2990 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2991 emitCommonSimdLoop(
2992 *this, S,
2993 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2994 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2995 CGF.EmitOMPSimdInit(S, IsMonotonic);
2996 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2997 if (C->getKind() == OMPC_ORDER_concurrent)
2998 CGF.LoopStack.setParallel(/*Enable=*/true);
2999 }
3000 },
3001 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3002 &S, ScheduleKind, LoopExit,
3003 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3004 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3005 // When no chunk_size is specified, the iteration space is divided
3006 // into chunks that are approximately equal in size, and at most
3007 // one chunk is distributed to each thread. Note that the size of
3008 // the chunks is unspecified in this case.
3009 CGOpenMPRuntime::StaticRTInput StaticInit(
3010 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3011 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3012 StaticChunkedOne ? Chunk : nullptr);
3013 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3014 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3015 StaticInit);
3016 // UB = min(UB, GlobalUB);
3017 if (!StaticChunkedOne)
3018 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3019 // IV = LB;
3020 CGF.EmitIgnoredExpr(S.getInit());
3021 // For unchunked static schedule generate:
3022 //
3023 // while (idx <= UB) {
3024 // BODY;
3025 // ++idx;
3026 // }
3027 //
3028 // For static schedule with chunk one:
3029 //
3030 // while (IV <= PrevUB) {
3031 // BODY;
3032 // IV += ST;
3033 // }
3034 CGF.EmitOMPInnerLoop(
3035 S, LoopScope.requiresCleanups(),
3036 StaticChunkedOne ? S.getCombinedParForInDistCond()
3037 : S.getCond(),
3038 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3039 [&S, LoopExit](CodeGenFunction &CGF) {
3040 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3041 },
3042 [](CodeGenFunction &) {});
3043 });
3044 EmitBlock(LoopExit.getBlock());
3045 // Tell the runtime we are done.
3046 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3047 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3048 S.getDirectiveKind());
3049 };
3050 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3051 } else {
3052 // Emit the outer loop, which requests its work chunk [LB..UB] from
3053 // runtime and runs the inner loop to process it.
3054 const OMPLoopArguments LoopArguments(
3055 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3056 IL.getAddress(*this), Chunk, EUB);
3057 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3058 LoopArguments, CGDispatchBounds);
3059 }
3060 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3061 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3062 return CGF.Builder.CreateIsNotNull(
3063 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3064 });
3065 }
3066 EmitOMPReductionClauseFinal(
3067 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3068 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3069 : /*Parallel only*/ OMPD_parallel);
3070 // Emit post-update of the reduction variables if IsLastIter != 0.
3071 emitPostUpdateForReductionClause(
3072 *this, S, [IL, &S](CodeGenFunction &CGF) {
3073 return CGF.Builder.CreateIsNotNull(
3074 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3075 });
3076 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3077 if (HasLastprivateClause)
3078 EmitOMPLastprivateClauseFinal(
3079 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3080 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3081 }
3082 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3083 return CGF.Builder.CreateIsNotNull(
3084 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3085 });
3086 DoacrossCleanupScope.ForceCleanup();
3087 // We're now done with the loop, so jump to the continuation block.
3088 if (ContBlock) {
3089 EmitBranch(ContBlock);
3090 EmitBlock(ContBlock, /*IsFinished=*/true);
3091 }
3092 }
3093 return HasLastprivateClause;
3094 }
3095
3096 /// The following two functions generate expressions for the loop lower
3097 /// and upper bounds in case of static and dynamic (dispatch) schedule
3098 /// of the associated 'for' or 'distribute' loop.
3099 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3100 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3101 const auto &LS = cast<OMPLoopDirective>(S);
3102 LValue LB =
3103 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3104 LValue UB =
3105 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3106 return {LB, UB};
3107 }
3108
3109 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3110 /// consider the lower and upper bound expressions generated by the
3111 /// worksharing loop support, but we use 0 and the iteration space size as
3112 /// constants
3113 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3114 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3115 Address LB, Address UB) {
3116 const auto &LS = cast<OMPLoopDirective>(S);
3117 const Expr *IVExpr = LS.getIterationVariable();
3118 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3119 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3120 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3121 return {LBVal, UBVal};
3122 }
3123
3124 /// Emits the code for the directive with inscan reductions.
3125 /// The code is the following:
3126 /// \code
3127 /// size num_iters = <num_iters>;
3128 /// <type> buffer[num_iters];
3129 /// #pragma omp ...
3130 /// for (i: 0..<num_iters>) {
3131 /// <input phase>;
3132 /// buffer[i] = red;
3133 /// }
3134 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3135 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3136 /// buffer[i] op= buffer[i-pow(2,k)];
3137 /// #pragma omp ...
3138 /// for (0..<num_iters>) {
3139 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3140 /// <scan phase>;
3141 /// }
3142 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3143 static void emitScanBasedDirective(
3144 CodeGenFunction &CGF, const OMPLoopDirective &S,
3145 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3146 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3147 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3148 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3149 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3150 SmallVector<const Expr *, 4> Shareds;
3151 SmallVector<const Expr *, 4> Privates;
3152 SmallVector<const Expr *, 4> ReductionOps;
3153 SmallVector<const Expr *, 4> LHSs;
3154 SmallVector<const Expr *, 4> RHSs;
3155 SmallVector<const Expr *, 4> CopyOps;
3156 SmallVector<const Expr *, 4> CopyArrayTemps;
3157 SmallVector<const Expr *, 4> CopyArrayElems;
3158 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3159 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3160 "Only inscan reductions are expected.");
3161 Shareds.append(C->varlist_begin(), C->varlist_end());
3162 Privates.append(C->privates().begin(), C->privates().end());
3163 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3164 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3165 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3166 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3167 CopyArrayTemps.append(C->copy_array_temps().begin(),
3168 C->copy_array_temps().end());
3169 CopyArrayElems.append(C->copy_array_elems().begin(),
3170 C->copy_array_elems().end());
3171 }
3172 {
3173 // Emit buffers for each reduction variables.
3174 // ReductionCodeGen is required to emit correctly the code for array
3175 // reductions.
3176 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3177 unsigned Count = 0;
3178 auto *ITA = CopyArrayTemps.begin();
3179 for (const Expr *IRef : Privates) {
3180 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3181 // Emit variably modified arrays, used for arrays/array sections
3182 // reductions.
3183 if (PrivateVD->getType()->isVariablyModifiedType()) {
3184 RedCG.emitSharedOrigLValue(CGF, Count);
3185 RedCG.emitAggregateType(CGF, Count);
3186 }
3187 CodeGenFunction::OpaqueValueMapping DimMapping(
3188 CGF,
3189 cast<OpaqueValueExpr>(
3190 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3191 ->getSizeExpr()),
3192 RValue::get(OMPScanNumIterations));
3193 // Emit temp buffer.
3194 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3195 ++ITA;
3196 ++Count;
3197 }
3198 }
3199 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3200 {
3201 // Emit loop with input phase:
3202 // #pragma omp ...
3203 // for (i: 0..<num_iters>) {
3204 // <input phase>;
3205 // buffer[i] = red;
3206 // }
3207 CGF.OMPFirstScanLoop = true;
3208 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3209 FirstGen(CGF);
3210 }
3211 // Emit prefix reduction:
3212 // for (int k = 0; k <= ceil(log2(n)); ++k)
3213 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3214 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3215 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3216 llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3217 llvm::Value *Arg =
3218 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3219 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3220 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3221 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3222 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3223 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3224 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3225 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3226 CGF.EmitBlock(LoopBB);
3227 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3228 // size pow2k = 1;
3229 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3230 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3231 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3232 // for (size i = n - 1; i >= 2 ^ k; --i)
3233 // tmp[i] op= tmp[i-pow2k];
3234 llvm::BasicBlock *InnerLoopBB =
3235 CGF.createBasicBlock("omp.inner.log.scan.body");
3236 llvm::BasicBlock *InnerExitBB =
3237 CGF.createBasicBlock("omp.inner.log.scan.exit");
3238 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3239 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3240 CGF.EmitBlock(InnerLoopBB);
3241 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3242 IVal->addIncoming(NMin1, LoopBB);
3243 {
3244 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3245 auto *ILHS = LHSs.begin();
3246 auto *IRHS = RHSs.begin();
3247 for (const Expr *CopyArrayElem : CopyArrayElems) {
3248 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3249 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3250 Address LHSAddr = Address::invalid();
3251 {
3252 CodeGenFunction::OpaqueValueMapping IdxMapping(
3253 CGF,
3254 cast<OpaqueValueExpr>(
3255 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3256 RValue::get(IVal));
3257 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3258 }
3259 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3260 Address RHSAddr = Address::invalid();
3261 {
3262 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3263 CodeGenFunction::OpaqueValueMapping IdxMapping(
3264 CGF,
3265 cast<OpaqueValueExpr>(
3266 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3267 RValue::get(OffsetIVal));
3268 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3269 }
3270 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3271 ++ILHS;
3272 ++IRHS;
3273 }
3274 PrivScope.Privatize();
3275 CGF.CGM.getOpenMPRuntime().emitReduction(
3276 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3277 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3278 }
3279 llvm::Value *NextIVal =
3280 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3281 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3282 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3283 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3284 CGF.EmitBlock(InnerExitBB);
3285 llvm::Value *Next =
3286 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3287 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3288 // pow2k <<= 1;
3289 llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3290 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3291 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3292 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3293 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3294 CGF.EmitBlock(ExitBB);
3295
3296 CGF.OMPFirstScanLoop = false;
3297 SecondGen(CGF);
3298 }
3299
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3300 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3301 const OMPLoopDirective &S,
3302 bool HasCancel) {
3303 bool HasLastprivates;
3304 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3305 [](const OMPReductionClause *C) {
3306 return C->getModifier() == OMPC_REDUCTION_inscan;
3307 })) {
3308 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3309 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3310 OMPLoopScope LoopScope(CGF, S);
3311 return CGF.EmitScalarExpr(S.getNumIterations());
3312 };
3313 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3314 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3315 CGF, S.getDirectiveKind(), HasCancel);
3316 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3317 emitForLoopBounds,
3318 emitDispatchForLoopBounds);
3319 // Emit an implicit barrier at the end.
3320 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3321 OMPD_for);
3322 };
3323 const auto &&SecondGen = [&S, HasCancel,
3324 &HasLastprivates](CodeGenFunction &CGF) {
3325 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3326 CGF, S.getDirectiveKind(), HasCancel);
3327 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3328 emitForLoopBounds,
3329 emitDispatchForLoopBounds);
3330 };
3331 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3332 } else {
3333 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3334 HasCancel);
3335 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3336 emitForLoopBounds,
3337 emitDispatchForLoopBounds);
3338 }
3339 return HasLastprivates;
3340 }
3341
EmitOMPForDirective(const OMPForDirective & S)3342 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3343 bool HasLastprivates = false;
3344 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3345 PrePostActionTy &) {
3346 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3347 };
3348 {
3349 auto LPCRegion =
3350 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3351 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3352 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3353 S.hasCancel());
3354 }
3355
3356 // Emit an implicit barrier at the end.
3357 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3358 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3359 // Check for outer lastprivate conditional update.
3360 checkForLastprivateConditionalUpdate(*this, S);
3361 }
3362
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3363 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3364 bool HasLastprivates = false;
3365 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3366 PrePostActionTy &) {
3367 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3368 };
3369 {
3370 auto LPCRegion =
3371 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3372 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3373 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3374 }
3375
3376 // Emit an implicit barrier at the end.
3377 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3378 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3379 // Check for outer lastprivate conditional update.
3380 checkForLastprivateConditionalUpdate(*this, S);
3381 }
3382
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)3383 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3384 const Twine &Name,
3385 llvm::Value *Init = nullptr) {
3386 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3387 if (Init)
3388 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3389 return LVal;
3390 }
3391
EmitSections(const OMPExecutableDirective & S)3392 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3393 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3394 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3395 bool HasLastprivates = false;
3396 auto &&CodeGen = [&S, CapturedStmt, CS,
3397 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3398 const ASTContext &C = CGF.getContext();
3399 QualType KmpInt32Ty =
3400 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3401 // Emit helper vars inits.
3402 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3403 CGF.Builder.getInt32(0));
3404 llvm::ConstantInt *GlobalUBVal = CS != nullptr
3405 ? CGF.Builder.getInt32(CS->size() - 1)
3406 : CGF.Builder.getInt32(0);
3407 LValue UB =
3408 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3409 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3410 CGF.Builder.getInt32(1));
3411 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3412 CGF.Builder.getInt32(0));
3413 // Loop counter.
3414 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3415 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3416 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3417 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3418 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3419 // Generate condition for loop.
3420 BinaryOperator *Cond = BinaryOperator::Create(
3421 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3422 S.getBeginLoc(), FPOptionsOverride());
3423 // Increment for loop counter.
3424 UnaryOperator *Inc = UnaryOperator::Create(
3425 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3426 S.getBeginLoc(), true, FPOptionsOverride());
3427 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3428 // Iterate through all sections and emit a switch construct:
3429 // switch (IV) {
3430 // case 0:
3431 // <SectionStmt[0]>;
3432 // break;
3433 // ...
3434 // case <NumSection> - 1:
3435 // <SectionStmt[<NumSection> - 1]>;
3436 // break;
3437 // }
3438 // .omp.sections.exit:
3439 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3440 llvm::SwitchInst *SwitchStmt =
3441 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3442 ExitBB, CS == nullptr ? 1 : CS->size());
3443 if (CS) {
3444 unsigned CaseNumber = 0;
3445 for (const Stmt *SubStmt : CS->children()) {
3446 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3447 CGF.EmitBlock(CaseBB);
3448 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3449 CGF.EmitStmt(SubStmt);
3450 CGF.EmitBranch(ExitBB);
3451 ++CaseNumber;
3452 }
3453 } else {
3454 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3455 CGF.EmitBlock(CaseBB);
3456 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3457 CGF.EmitStmt(CapturedStmt);
3458 CGF.EmitBranch(ExitBB);
3459 }
3460 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3461 };
3462
3463 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3464 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3465 // Emit implicit barrier to synchronize threads and avoid data races on
3466 // initialization of firstprivate variables and post-update of lastprivate
3467 // variables.
3468 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3469 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3470 /*ForceSimpleCall=*/true);
3471 }
3472 CGF.EmitOMPPrivateClause(S, LoopScope);
3473 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3474 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3475 CGF.EmitOMPReductionClauseInit(S, LoopScope);
3476 (void)LoopScope.Privatize();
3477 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3478 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3479
3480 // Emit static non-chunked loop.
3481 OpenMPScheduleTy ScheduleKind;
3482 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3483 CGOpenMPRuntime::StaticRTInput StaticInit(
3484 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3485 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3486 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3487 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3488 // UB = min(UB, GlobalUB);
3489 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3490 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3491 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3492 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3493 // IV = LB;
3494 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3495 // while (idx <= UB) { BODY; ++idx; }
3496 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3497 [](CodeGenFunction &) {});
3498 // Tell the runtime we are done.
3499 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3500 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3501 S.getDirectiveKind());
3502 };
3503 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3504 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3505 // Emit post-update of the reduction variables if IsLastIter != 0.
3506 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3507 return CGF.Builder.CreateIsNotNull(
3508 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3509 });
3510
3511 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3512 if (HasLastprivates)
3513 CGF.EmitOMPLastprivateClauseFinal(
3514 S, /*NoFinals=*/false,
3515 CGF.Builder.CreateIsNotNull(
3516 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3517 };
3518
3519 bool HasCancel = false;
3520 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3521 HasCancel = OSD->hasCancel();
3522 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3523 HasCancel = OPSD->hasCancel();
3524 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3525 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3526 HasCancel);
3527 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3528 // clause. Otherwise the barrier will be generated by the codegen for the
3529 // directive.
3530 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3531 // Emit implicit barrier to synchronize threads and avoid data races on
3532 // initialization of firstprivate variables.
3533 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3534 OMPD_unknown);
3535 }
3536 }
3537
EmitOMPSectionsDirective(const OMPSectionsDirective & S)3538 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3539 {
3540 auto LPCRegion =
3541 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3542 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3543 EmitSections(S);
3544 }
3545 // Emit an implicit barrier at the end.
3546 if (!S.getSingleClause<OMPNowaitClause>()) {
3547 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3548 OMPD_sections);
3549 }
3550 // Check for outer lastprivate conditional update.
3551 checkForLastprivateConditionalUpdate(*this, S);
3552 }
3553
EmitOMPSectionDirective(const OMPSectionDirective & S)3554 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3555 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3556 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3557 };
3558 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3559 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
3560 S.hasCancel());
3561 }
3562
EmitOMPSingleDirective(const OMPSingleDirective & S)3563 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3564 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3565 llvm::SmallVector<const Expr *, 8> DestExprs;
3566 llvm::SmallVector<const Expr *, 8> SrcExprs;
3567 llvm::SmallVector<const Expr *, 8> AssignmentOps;
3568 // Check if there are any 'copyprivate' clauses associated with this
3569 // 'single' construct.
3570 // Build a list of copyprivate variables along with helper expressions
3571 // (<source>, <destination>, <destination>=<source> expressions)
3572 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3573 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3574 DestExprs.append(C->destination_exprs().begin(),
3575 C->destination_exprs().end());
3576 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3577 AssignmentOps.append(C->assignment_ops().begin(),
3578 C->assignment_ops().end());
3579 }
3580 // Emit code for 'single' region along with 'copyprivate' clauses
3581 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3582 Action.Enter(CGF);
3583 OMPPrivateScope SingleScope(CGF);
3584 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3585 CGF.EmitOMPPrivateClause(S, SingleScope);
3586 (void)SingleScope.Privatize();
3587 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3588 };
3589 {
3590 auto LPCRegion =
3591 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3592 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3593 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3594 CopyprivateVars, DestExprs,
3595 SrcExprs, AssignmentOps);
3596 }
3597 // Emit an implicit barrier at the end (to avoid data race on firstprivate
3598 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3599 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3600 CGM.getOpenMPRuntime().emitBarrierCall(
3601 *this, S.getBeginLoc(),
3602 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3603 }
3604 // Check for outer lastprivate conditional update.
3605 checkForLastprivateConditionalUpdate(*this, S);
3606 }
3607
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)3608 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3609 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3610 Action.Enter(CGF);
3611 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3612 };
3613 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3614 }
3615
EmitOMPMasterDirective(const OMPMasterDirective & S)3616 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3617 if (CGM.getLangOpts().OpenMPIRBuilder) {
3618 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3619 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3620
3621 const CapturedStmt *CS = S.getInnermostCapturedStmt();
3622 const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt();
3623
3624 auto FiniCB = [this](InsertPointTy IP) {
3625 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3626 };
3627
3628 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3629 InsertPointTy CodeGenIP,
3630 llvm::BasicBlock &FiniBB) {
3631 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3632 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3633 CodeGenIP, FiniBB);
3634 };
3635
3636 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
3637 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3638 Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB));
3639
3640 return;
3641 }
3642 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3643 emitMaster(*this, S);
3644 }
3645
EmitOMPCriticalDirective(const OMPCriticalDirective & S)3646 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
3647 if (CGM.getLangOpts().OpenMPIRBuilder) {
3648 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3649 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3650
3651 const CapturedStmt *CS = S.getInnermostCapturedStmt();
3652 const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt();
3653 const Expr *Hint = nullptr;
3654 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3655 Hint = HintClause->getHint();
3656
3657 // TODO: This is slightly different from what's currently being done in
3658 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
3659 // about typing is final.
3660 llvm::Value *HintInst = nullptr;
3661 if (Hint)
3662 HintInst =
3663 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
3664
3665 auto FiniCB = [this](InsertPointTy IP) {
3666 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3667 };
3668
3669 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
3670 InsertPointTy CodeGenIP,
3671 llvm::BasicBlock &FiniBB) {
3672 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3673 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3674 CodeGenIP, FiniBB);
3675 };
3676
3677 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
3678 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3679 Builder.restoreIP(OMPBuilder.CreateCritical(
3680 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3681 HintInst));
3682
3683 return;
3684 }
3685
3686 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3687 Action.Enter(CGF);
3688 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3689 };
3690 const Expr *Hint = nullptr;
3691 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3692 Hint = HintClause->getHint();
3693 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3694 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3695 S.getDirectiveName().getAsString(),
3696 CodeGen, S.getBeginLoc(), Hint);
3697 }
3698
EmitOMPParallelForDirective(const OMPParallelForDirective & S)3699 void CodeGenFunction::EmitOMPParallelForDirective(
3700 const OMPParallelForDirective &S) {
3701 // Emit directive as a combined directive that consists of two implicit
3702 // directives: 'parallel' with 'for' directive.
3703 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3704 Action.Enter(CGF);
3705 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
3706 };
3707 {
3708 auto LPCRegion =
3709 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3710 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3711 emitEmptyBoundParameters);
3712 }
3713 // Check for outer lastprivate conditional update.
3714 checkForLastprivateConditionalUpdate(*this, S);
3715 }
3716
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)3717 void CodeGenFunction::EmitOMPParallelForSimdDirective(
3718 const OMPParallelForSimdDirective &S) {
3719 // Emit directive as a combined directive that consists of two implicit
3720 // directives: 'parallel' with 'for' directive.
3721 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3722 Action.Enter(CGF);
3723 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3724 };
3725 {
3726 auto LPCRegion =
3727 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3728 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
3729 emitEmptyBoundParameters);
3730 }
3731 // Check for outer lastprivate conditional update.
3732 checkForLastprivateConditionalUpdate(*this, S);
3733 }
3734
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)3735 void CodeGenFunction::EmitOMPParallelMasterDirective(
3736 const OMPParallelMasterDirective &S) {
3737 // Emit directive as a combined directive that consists of two implicit
3738 // directives: 'parallel' with 'master' directive.
3739 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3740 Action.Enter(CGF);
3741 OMPPrivateScope PrivateScope(CGF);
3742 bool Copyins = CGF.EmitOMPCopyinClause(S);
3743 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3744 if (Copyins) {
3745 // Emit implicit barrier to synchronize threads and avoid data races on
3746 // propagation master's thread values of threadprivate variables to local
3747 // instances of that variables of all other implicit threads.
3748 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3749 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3750 /*ForceSimpleCall=*/true);
3751 }
3752 CGF.EmitOMPPrivateClause(S, PrivateScope);
3753 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3754 (void)PrivateScope.Privatize();
3755 emitMaster(CGF, S);
3756 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3757 };
3758 {
3759 auto LPCRegion =
3760 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3761 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
3762 emitEmptyBoundParameters);
3763 emitPostUpdateForReductionClause(*this, S,
3764 [](CodeGenFunction &) { return nullptr; });
3765 }
3766 // Check for outer lastprivate conditional update.
3767 checkForLastprivateConditionalUpdate(*this, S);
3768 }
3769
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)3770 void CodeGenFunction::EmitOMPParallelSectionsDirective(
3771 const OMPParallelSectionsDirective &S) {
3772 // Emit directive as a combined directive that consists of two implicit
3773 // directives: 'parallel' with 'sections' directive.
3774 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3775 Action.Enter(CGF);
3776 CGF.EmitSections(S);
3777 };
3778 {
3779 auto LPCRegion =
3780 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3781 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
3782 emitEmptyBoundParameters);
3783 }
3784 // Check for outer lastprivate conditional update.
3785 checkForLastprivateConditionalUpdate(*this, S);
3786 }
3787
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)3788 void CodeGenFunction::EmitOMPTaskBasedDirective(
3789 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
3790 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
3791 OMPTaskDataTy &Data) {
3792 // Emit outlined function for task construct.
3793 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
3794 auto I = CS->getCapturedDecl()->param_begin();
3795 auto PartId = std::next(I);
3796 auto TaskT = std::next(I, 4);
3797 // Check if the task is final
3798 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
3799 // If the condition constant folds and can be elided, try to avoid emitting
3800 // the condition and the dead arm of the if/else.
3801 const Expr *Cond = Clause->getCondition();
3802 bool CondConstant;
3803 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
3804 Data.Final.setInt(CondConstant);
3805 else
3806 Data.Final.setPointer(EvaluateExprAsBool(Cond));
3807 } else {
3808 // By default the task is not final.
3809 Data.Final.setInt(/*IntVal=*/false);
3810 }
3811 // Check if the task has 'priority' clause.
3812 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
3813 const Expr *Prio = Clause->getPriority();
3814 Data.Priority.setInt(/*IntVal=*/true);
3815 Data.Priority.setPointer(EmitScalarConversion(
3816 EmitScalarExpr(Prio), Prio->getType(),
3817 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
3818 Prio->getExprLoc()));
3819 }
3820 // The first function argument for tasks is a thread id, the second one is a
3821 // part id (0 for tied tasks, >=0 for untied task).
3822 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
3823 // Get list of private variables.
3824 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
3825 auto IRef = C->varlist_begin();
3826 for (const Expr *IInit : C->private_copies()) {
3827 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3828 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3829 Data.PrivateVars.push_back(*IRef);
3830 Data.PrivateCopies.push_back(IInit);
3831 }
3832 ++IRef;
3833 }
3834 }
3835 EmittedAsPrivate.clear();
3836 // Get list of firstprivate variables.
3837 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3838 auto IRef = C->varlist_begin();
3839 auto IElemInitRef = C->inits().begin();
3840 for (const Expr *IInit : C->private_copies()) {
3841 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3842 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3843 Data.FirstprivateVars.push_back(*IRef);
3844 Data.FirstprivateCopies.push_back(IInit);
3845 Data.FirstprivateInits.push_back(*IElemInitRef);
3846 }
3847 ++IRef;
3848 ++IElemInitRef;
3849 }
3850 }
3851 // Get list of lastprivate variables (for taskloops).
3852 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
3853 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
3854 auto IRef = C->varlist_begin();
3855 auto ID = C->destination_exprs().begin();
3856 for (const Expr *IInit : C->private_copies()) {
3857 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3858 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3859 Data.LastprivateVars.push_back(*IRef);
3860 Data.LastprivateCopies.push_back(IInit);
3861 }
3862 LastprivateDstsOrigs.insert(
3863 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
3864 cast<DeclRefExpr>(*IRef)});
3865 ++IRef;
3866 ++ID;
3867 }
3868 }
3869 SmallVector<const Expr *, 4> LHSs;
3870 SmallVector<const Expr *, 4> RHSs;
3871 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3872 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
3873 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
3874 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
3875 Data.ReductionOps.append(C->reduction_ops().begin(),
3876 C->reduction_ops().end());
3877 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3878 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3879 }
3880 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
3881 *this, S.getBeginLoc(), LHSs, RHSs, Data);
3882 // Build list of dependences.
3883 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
3884 OMPTaskDataTy::DependData &DD =
3885 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
3886 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
3887 }
3888 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
3889 CapturedRegion](CodeGenFunction &CGF,
3890 PrePostActionTy &Action) {
3891 // Set proper addresses for generated private copies.
3892 OMPPrivateScope Scope(CGF);
3893 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
3894 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
3895 !Data.LastprivateVars.empty()) {
3896 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3897 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3898 enum { PrivatesParam = 2, CopyFnParam = 3 };
3899 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3900 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3901 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3902 CS->getCapturedDecl()->getParam(PrivatesParam)));
3903 // Map privates.
3904 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3905 llvm::SmallVector<llvm::Value *, 16> CallArgs;
3906 CallArgs.push_back(PrivatesPtr);
3907 for (const Expr *E : Data.PrivateVars) {
3908 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3909 Address PrivatePtr = CGF.CreateMemTemp(
3910 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3911 PrivatePtrs.emplace_back(VD, PrivatePtr);
3912 CallArgs.push_back(PrivatePtr.getPointer());
3913 }
3914 for (const Expr *E : Data.FirstprivateVars) {
3915 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3916 Address PrivatePtr =
3917 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3918 ".firstpriv.ptr.addr");
3919 PrivatePtrs.emplace_back(VD, PrivatePtr);
3920 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
3921 CallArgs.push_back(PrivatePtr.getPointer());
3922 }
3923 for (const Expr *E : Data.LastprivateVars) {
3924 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3925 Address PrivatePtr =
3926 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3927 ".lastpriv.ptr.addr");
3928 PrivatePtrs.emplace_back(VD, PrivatePtr);
3929 CallArgs.push_back(PrivatePtr.getPointer());
3930 }
3931 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3932 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3933 for (const auto &Pair : LastprivateDstsOrigs) {
3934 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3935 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
3936 /*RefersToEnclosingVariableOrCapture=*/
3937 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
3938 Pair.second->getType(), VK_LValue,
3939 Pair.second->getExprLoc());
3940 Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
3941 return CGF.EmitLValue(&DRE).getAddress(CGF);
3942 });
3943 }
3944 for (const auto &Pair : PrivatePtrs) {
3945 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3946 CGF.getContext().getDeclAlign(Pair.first));
3947 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3948 }
3949 }
3950 if (Data.Reductions) {
3951 OMPPrivateScope FirstprivateScope(CGF);
3952 for (const auto &Pair : FirstprivatePtrs) {
3953 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3954 CGF.getContext().getDeclAlign(Pair.first));
3955 FirstprivateScope.addPrivate(Pair.first,
3956 [Replacement]() { return Replacement; });
3957 }
3958 (void)FirstprivateScope.Privatize();
3959 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
3960 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
3961 Data.ReductionCopies, Data.ReductionOps);
3962 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
3963 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
3964 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
3965 RedCG.emitSharedOrigLValue(CGF, Cnt);
3966 RedCG.emitAggregateType(CGF, Cnt);
3967 // FIXME: This must removed once the runtime library is fixed.
3968 // Emit required threadprivate variables for
3969 // initializer/combiner/finalizer.
3970 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
3971 RedCG, Cnt);
3972 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
3973 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
3974 Replacement =
3975 Address(CGF.EmitScalarConversion(
3976 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
3977 CGF.getContext().getPointerType(
3978 Data.ReductionCopies[Cnt]->getType()),
3979 Data.ReductionCopies[Cnt]->getExprLoc()),
3980 Replacement.getAlignment());
3981 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
3982 Scope.addPrivate(RedCG.getBaseDecl(Cnt),
3983 [Replacement]() { return Replacement; });
3984 }
3985 }
3986 // Privatize all private variables except for in_reduction items.
3987 (void)Scope.Privatize();
3988 SmallVector<const Expr *, 4> InRedVars;
3989 SmallVector<const Expr *, 4> InRedPrivs;
3990 SmallVector<const Expr *, 4> InRedOps;
3991 SmallVector<const Expr *, 4> TaskgroupDescriptors;
3992 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
3993 auto IPriv = C->privates().begin();
3994 auto IRed = C->reduction_ops().begin();
3995 auto ITD = C->taskgroup_descriptors().begin();
3996 for (const Expr *Ref : C->varlists()) {
3997 InRedVars.emplace_back(Ref);
3998 InRedPrivs.emplace_back(*IPriv);
3999 InRedOps.emplace_back(*IRed);
4000 TaskgroupDescriptors.emplace_back(*ITD);
4001 std::advance(IPriv, 1);
4002 std::advance(IRed, 1);
4003 std::advance(ITD, 1);
4004 }
4005 }
4006 // Privatize in_reduction items here, because taskgroup descriptors must be
4007 // privatized earlier.
4008 OMPPrivateScope InRedScope(CGF);
4009 if (!InRedVars.empty()) {
4010 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4011 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4012 RedCG.emitSharedOrigLValue(CGF, Cnt);
4013 RedCG.emitAggregateType(CGF, Cnt);
4014 // The taskgroup descriptor variable is always implicit firstprivate and
4015 // privatized already during processing of the firstprivates.
4016 // FIXME: This must removed once the runtime library is fixed.
4017 // Emit required threadprivate variables for
4018 // initializer/combiner/finalizer.
4019 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4020 RedCG, Cnt);
4021 llvm::Value *ReductionsPtr;
4022 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4023 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4024 TRExpr->getExprLoc());
4025 } else {
4026 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4027 }
4028 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4029 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4030 Replacement = Address(
4031 CGF.EmitScalarConversion(
4032 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4033 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4034 InRedPrivs[Cnt]->getExprLoc()),
4035 Replacement.getAlignment());
4036 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4037 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4038 [Replacement]() { return Replacement; });
4039 }
4040 }
4041 (void)InRedScope.Privatize();
4042
4043 Action.Enter(CGF);
4044 BodyGen(CGF);
4045 };
4046 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4047 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4048 Data.NumberOfParts);
4049 OMPLexicalScope Scope(*this, S, llvm::None,
4050 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4051 !isOpenMPSimdDirective(S.getDirectiveKind()));
4052 TaskGen(*this, OutlinedFn, Data);
4053 }
4054
4055 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)4056 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4057 QualType Ty, CapturedDecl *CD,
4058 SourceLocation Loc) {
4059 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4060 ImplicitParamDecl::Other);
4061 auto *OrigRef = DeclRefExpr::Create(
4062 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4063 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4064 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4065 ImplicitParamDecl::Other);
4066 auto *PrivateRef = DeclRefExpr::Create(
4067 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4068 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4069 QualType ElemType = C.getBaseElementType(Ty);
4070 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4071 ImplicitParamDecl::Other);
4072 auto *InitRef = DeclRefExpr::Create(
4073 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4074 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4075 PrivateVD->setInitStyle(VarDecl::CInit);
4076 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4077 InitRef, /*BasePath=*/nullptr,
4078 VK_RValue));
4079 Data.FirstprivateVars.emplace_back(OrigRef);
4080 Data.FirstprivateCopies.emplace_back(PrivateRef);
4081 Data.FirstprivateInits.emplace_back(InitRef);
4082 return OrigVD;
4083 }
4084
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)4085 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4086 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4087 OMPTargetDataInfo &InputInfo) {
4088 // Emit outlined function for task construct.
4089 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4090 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4091 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4092 auto I = CS->getCapturedDecl()->param_begin();
4093 auto PartId = std::next(I);
4094 auto TaskT = std::next(I, 4);
4095 OMPTaskDataTy Data;
4096 // The task is not final.
4097 Data.Final.setInt(/*IntVal=*/false);
4098 // Get list of firstprivate variables.
4099 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4100 auto IRef = C->varlist_begin();
4101 auto IElemInitRef = C->inits().begin();
4102 for (auto *IInit : C->private_copies()) {
4103 Data.FirstprivateVars.push_back(*IRef);
4104 Data.FirstprivateCopies.push_back(IInit);
4105 Data.FirstprivateInits.push_back(*IElemInitRef);
4106 ++IRef;
4107 ++IElemInitRef;
4108 }
4109 }
4110 OMPPrivateScope TargetScope(*this);
4111 VarDecl *BPVD = nullptr;
4112 VarDecl *PVD = nullptr;
4113 VarDecl *SVD = nullptr;
4114 if (InputInfo.NumberOfTargetItems > 0) {
4115 auto *CD = CapturedDecl::Create(
4116 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4117 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4118 QualType BaseAndPointersType = getContext().getConstantArrayType(
4119 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4120 /*IndexTypeQuals=*/0);
4121 BPVD = createImplicitFirstprivateForType(
4122 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
4123 PVD = createImplicitFirstprivateForType(
4124 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
4125 QualType SizesType = getContext().getConstantArrayType(
4126 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4127 ArrSize, nullptr, ArrayType::Normal,
4128 /*IndexTypeQuals=*/0);
4129 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4130 S.getBeginLoc());
4131 TargetScope.addPrivate(
4132 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4133 TargetScope.addPrivate(PVD,
4134 [&InputInfo]() { return InputInfo.PointersArray; });
4135 TargetScope.addPrivate(SVD,
4136 [&InputInfo]() { return InputInfo.SizesArray; });
4137 }
4138 (void)TargetScope.Privatize();
4139 // Build list of dependences.
4140 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4141 OMPTaskDataTy::DependData &DD =
4142 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4143 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4144 }
4145 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
4146 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4147 // Set proper addresses for generated private copies.
4148 OMPPrivateScope Scope(CGF);
4149 if (!Data.FirstprivateVars.empty()) {
4150 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
4151 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
4152 enum { PrivatesParam = 2, CopyFnParam = 3 };
4153 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4154 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4155 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4156 CS->getCapturedDecl()->getParam(PrivatesParam)));
4157 // Map privates.
4158 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4159 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4160 CallArgs.push_back(PrivatesPtr);
4161 for (const Expr *E : Data.FirstprivateVars) {
4162 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4163 Address PrivatePtr =
4164 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4165 ".firstpriv.ptr.addr");
4166 PrivatePtrs.emplace_back(VD, PrivatePtr);
4167 CallArgs.push_back(PrivatePtr.getPointer());
4168 }
4169 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4170 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4171 for (const auto &Pair : PrivatePtrs) {
4172 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4173 CGF.getContext().getDeclAlign(Pair.first));
4174 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4175 }
4176 }
4177 // Privatize all private variables except for in_reduction items.
4178 (void)Scope.Privatize();
4179 if (InputInfo.NumberOfTargetItems > 0) {
4180 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4181 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4182 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4183 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4184 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4185 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4186 }
4187
4188 Action.Enter(CGF);
4189 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4190 BodyGen(CGF);
4191 };
4192 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4193 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4194 Data.NumberOfParts);
4195 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4196 IntegerLiteral IfCond(getContext(), TrueOrFalse,
4197 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4198 SourceLocation());
4199
4200 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4201 SharedsTy, CapturedStruct, &IfCond, Data);
4202 }
4203
EmitOMPTaskDirective(const OMPTaskDirective & S)4204 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4205 // Emit outlined function for task construct.
4206 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4207 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4208 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4209 const Expr *IfCond = nullptr;
4210 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4211 if (C->getNameModifier() == OMPD_unknown ||
4212 C->getNameModifier() == OMPD_task) {
4213 IfCond = C->getCondition();
4214 break;
4215 }
4216 }
4217
4218 OMPTaskDataTy Data;
4219 // Check if we should emit tied or untied task.
4220 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4221 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4222 CGF.EmitStmt(CS->getCapturedStmt());
4223 };
4224 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4225 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4226 const OMPTaskDataTy &Data) {
4227 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4228 SharedsTy, CapturedStruct, IfCond,
4229 Data);
4230 };
4231 auto LPCRegion =
4232 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4233 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4234 }
4235
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)4236 void CodeGenFunction::EmitOMPTaskyieldDirective(
4237 const OMPTaskyieldDirective &S) {
4238 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4239 }
4240
EmitOMPBarrierDirective(const OMPBarrierDirective & S)4241 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4242 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4243 }
4244
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)4245 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4246 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4247 }
4248
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)4249 void CodeGenFunction::EmitOMPTaskgroupDirective(
4250 const OMPTaskgroupDirective &S) {
4251 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4252 Action.Enter(CGF);
4253 if (const Expr *E = S.getReductionRef()) {
4254 SmallVector<const Expr *, 4> LHSs;
4255 SmallVector<const Expr *, 4> RHSs;
4256 OMPTaskDataTy Data;
4257 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4258 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4259 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4260 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4261 Data.ReductionOps.append(C->reduction_ops().begin(),
4262 C->reduction_ops().end());
4263 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4264 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4265 }
4266 llvm::Value *ReductionDesc =
4267 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4268 LHSs, RHSs, Data);
4269 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4270 CGF.EmitVarDecl(*VD);
4271 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4272 /*Volatile=*/false, E->getType());
4273 }
4274 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4275 };
4276 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4277 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4278 }
4279
EmitOMPFlushDirective(const OMPFlushDirective & S)4280 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4281 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4282 ? llvm::AtomicOrdering::NotAtomic
4283 : llvm::AtomicOrdering::AcquireRelease;
4284 CGM.getOpenMPRuntime().emitFlush(
4285 *this,
4286 [&S]() -> ArrayRef<const Expr *> {
4287 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4288 return llvm::makeArrayRef(FlushClause->varlist_begin(),
4289 FlushClause->varlist_end());
4290 return llvm::None;
4291 }(),
4292 S.getBeginLoc(), AO);
4293 }
4294
EmitOMPDepobjDirective(const OMPDepobjDirective & S)4295 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4296 const auto *DO = S.getSingleClause<OMPDepobjClause>();
4297 LValue DOLVal = EmitLValue(DO->getDepobj());
4298 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4299 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4300 DC->getModifier());
4301 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4302 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4303 *this, Dependencies, DC->getBeginLoc());
4304 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4305 return;
4306 }
4307 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4308 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4309 return;
4310 }
4311 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4312 CGM.getOpenMPRuntime().emitUpdateClause(
4313 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4314 return;
4315 }
4316 }
4317
EmitOMPScanDirective(const OMPScanDirective & S)4318 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4319 if (!OMPParentLoopDirectiveForScan)
4320 return;
4321 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4322 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4323 SmallVector<const Expr *, 4> Shareds;
4324 SmallVector<const Expr *, 4> Privates;
4325 SmallVector<const Expr *, 4> LHSs;
4326 SmallVector<const Expr *, 4> RHSs;
4327 SmallVector<const Expr *, 4> ReductionOps;
4328 SmallVector<const Expr *, 4> CopyOps;
4329 SmallVector<const Expr *, 4> CopyArrayTemps;
4330 SmallVector<const Expr *, 4> CopyArrayElems;
4331 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4332 if (C->getModifier() != OMPC_REDUCTION_inscan)
4333 continue;
4334 Shareds.append(C->varlist_begin(), C->varlist_end());
4335 Privates.append(C->privates().begin(), C->privates().end());
4336 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4337 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4338 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4339 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4340 CopyArrayTemps.append(C->copy_array_temps().begin(),
4341 C->copy_array_temps().end());
4342 CopyArrayElems.append(C->copy_array_elems().begin(),
4343 C->copy_array_elems().end());
4344 }
4345 if (ParentDir.getDirectiveKind() == OMPD_simd ||
4346 (getLangOpts().OpenMPSimd &&
4347 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4348 // For simd directive and simd-based directives in simd only mode, use the
4349 // following codegen:
4350 // int x = 0;
4351 // #pragma omp simd reduction(inscan, +: x)
4352 // for (..) {
4353 // <first part>
4354 // #pragma omp scan inclusive(x)
4355 // <second part>
4356 // }
4357 // is transformed to:
4358 // int x = 0;
4359 // for (..) {
4360 // int x_priv = 0;
4361 // <first part>
4362 // x = x_priv + x;
4363 // x_priv = x;
4364 // <second part>
4365 // }
4366 // and
4367 // int x = 0;
4368 // #pragma omp simd reduction(inscan, +: x)
4369 // for (..) {
4370 // <first part>
4371 // #pragma omp scan exclusive(x)
4372 // <second part>
4373 // }
4374 // to
4375 // int x = 0;
4376 // for (..) {
4377 // int x_priv = 0;
4378 // <second part>
4379 // int temp = x;
4380 // x = x_priv + x;
4381 // x_priv = temp;
4382 // <first part>
4383 // }
4384 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4385 EmitBranch(IsInclusive
4386 ? OMPScanReduce
4387 : BreakContinueStack.back().ContinueBlock.getBlock());
4388 EmitBlock(OMPScanDispatch);
4389 {
4390 // New scope for correct construction/destruction of temp variables for
4391 // exclusive scan.
4392 LexicalScope Scope(*this, S.getSourceRange());
4393 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4394 EmitBlock(OMPScanReduce);
4395 if (!IsInclusive) {
4396 // Create temp var and copy LHS value to this temp value.
4397 // TMP = LHS;
4398 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4399 const Expr *PrivateExpr = Privates[I];
4400 const Expr *TempExpr = CopyArrayTemps[I];
4401 EmitAutoVarDecl(
4402 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4403 LValue DestLVal = EmitLValue(TempExpr);
4404 LValue SrcLVal = EmitLValue(LHSs[I]);
4405 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4406 SrcLVal.getAddress(*this),
4407 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4408 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4409 CopyOps[I]);
4410 }
4411 }
4412 CGM.getOpenMPRuntime().emitReduction(
4413 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4414 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4415 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4416 const Expr *PrivateExpr = Privates[I];
4417 LValue DestLVal;
4418 LValue SrcLVal;
4419 if (IsInclusive) {
4420 DestLVal = EmitLValue(RHSs[I]);
4421 SrcLVal = EmitLValue(LHSs[I]);
4422 } else {
4423 const Expr *TempExpr = CopyArrayTemps[I];
4424 DestLVal = EmitLValue(RHSs[I]);
4425 SrcLVal = EmitLValue(TempExpr);
4426 }
4427 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4428 SrcLVal.getAddress(*this),
4429 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4430 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4431 CopyOps[I]);
4432 }
4433 }
4434 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4435 OMPScanExitBlock = IsInclusive
4436 ? BreakContinueStack.back().ContinueBlock.getBlock()
4437 : OMPScanReduce;
4438 EmitBlock(OMPAfterScanBlock);
4439 return;
4440 }
4441 if (!IsInclusive) {
4442 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4443 EmitBlock(OMPScanExitBlock);
4444 }
4445 if (OMPFirstScanLoop) {
4446 // Emit buffer[i] = red; at the end of the input phase.
4447 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4448 .getIterationVariable()
4449 ->IgnoreParenImpCasts();
4450 LValue IdxLVal = EmitLValue(IVExpr);
4451 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4452 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4453 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4454 const Expr *PrivateExpr = Privates[I];
4455 const Expr *OrigExpr = Shareds[I];
4456 const Expr *CopyArrayElem = CopyArrayElems[I];
4457 OpaqueValueMapping IdxMapping(
4458 *this,
4459 cast<OpaqueValueExpr>(
4460 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4461 RValue::get(IdxVal));
4462 LValue DestLVal = EmitLValue(CopyArrayElem);
4463 LValue SrcLVal = EmitLValue(OrigExpr);
4464 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4465 SrcLVal.getAddress(*this),
4466 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4467 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4468 CopyOps[I]);
4469 }
4470 }
4471 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4472 if (IsInclusive) {
4473 EmitBlock(OMPScanExitBlock);
4474 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4475 }
4476 EmitBlock(OMPScanDispatch);
4477 if (!OMPFirstScanLoop) {
4478 // Emit red = buffer[i]; at the entrance to the scan phase.
4479 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4480 .getIterationVariable()
4481 ->IgnoreParenImpCasts();
4482 LValue IdxLVal = EmitLValue(IVExpr);
4483 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4484 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4485 llvm::BasicBlock *ExclusiveExitBB = nullptr;
4486 if (!IsInclusive) {
4487 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
4488 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
4489 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
4490 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
4491 EmitBlock(ContBB);
4492 // Use idx - 1 iteration for exclusive scan.
4493 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
4494 }
4495 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4496 const Expr *PrivateExpr = Privates[I];
4497 const Expr *OrigExpr = Shareds[I];
4498 const Expr *CopyArrayElem = CopyArrayElems[I];
4499 OpaqueValueMapping IdxMapping(
4500 *this,
4501 cast<OpaqueValueExpr>(
4502 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4503 RValue::get(IdxVal));
4504 LValue SrcLVal = EmitLValue(CopyArrayElem);
4505 LValue DestLVal = EmitLValue(OrigExpr);
4506 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4507 SrcLVal.getAddress(*this),
4508 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4509 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4510 CopyOps[I]);
4511 }
4512 if (!IsInclusive) {
4513 EmitBlock(ExclusiveExitBB);
4514 }
4515 }
4516 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
4517 : OMPAfterScanBlock);
4518 EmitBlock(OMPAfterScanBlock);
4519 }
4520
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)4521 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
4522 const CodeGenLoopTy &CodeGenLoop,
4523 Expr *IncExpr) {
4524 // Emit the loop iteration variable.
4525 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
4526 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
4527 EmitVarDecl(*IVDecl);
4528
4529 // Emit the iterations count variable.
4530 // If it is not a variable, Sema decided to calculate iterations count on each
4531 // iteration (e.g., it is foldable into a constant).
4532 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4533 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4534 // Emit calculation of the iterations count.
4535 EmitIgnoredExpr(S.getCalcLastIteration());
4536 }
4537
4538 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
4539
4540 bool HasLastprivateClause = false;
4541 // Check pre-condition.
4542 {
4543 OMPLoopScope PreInitScope(*this, S);
4544 // Skip the entire loop if we don't meet the precondition.
4545 // If the condition constant folds and can be elided, avoid emitting the
4546 // whole loop.
4547 bool CondConstant;
4548 llvm::BasicBlock *ContBlock = nullptr;
4549 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4550 if (!CondConstant)
4551 return;
4552 } else {
4553 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
4554 ContBlock = createBasicBlock("omp.precond.end");
4555 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
4556 getProfileCount(&S));
4557 EmitBlock(ThenBlock);
4558 incrementProfileCounter(&S);
4559 }
4560
4561 emitAlignedClause(*this, S);
4562 // Emit 'then' code.
4563 {
4564 // Emit helper vars inits.
4565
4566 LValue LB = EmitOMPHelperVar(
4567 *this, cast<DeclRefExpr>(
4568 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4569 ? S.getCombinedLowerBoundVariable()
4570 : S.getLowerBoundVariable())));
4571 LValue UB = EmitOMPHelperVar(
4572 *this, cast<DeclRefExpr>(
4573 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4574 ? S.getCombinedUpperBoundVariable()
4575 : S.getUpperBoundVariable())));
4576 LValue ST =
4577 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
4578 LValue IL =
4579 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
4580
4581 OMPPrivateScope LoopScope(*this);
4582 if (EmitOMPFirstprivateClause(S, LoopScope)) {
4583 // Emit implicit barrier to synchronize threads and avoid data races
4584 // on initialization of firstprivate variables and post-update of
4585 // lastprivate variables.
4586 CGM.getOpenMPRuntime().emitBarrierCall(
4587 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4588 /*ForceSimpleCall=*/true);
4589 }
4590 EmitOMPPrivateClause(S, LoopScope);
4591 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4592 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4593 !isOpenMPTeamsDirective(S.getDirectiveKind()))
4594 EmitOMPReductionClauseInit(S, LoopScope);
4595 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
4596 EmitOMPPrivateLoopCounters(S, LoopScope);
4597 (void)LoopScope.Privatize();
4598 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4599 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
4600
4601 // Detect the distribute schedule kind and chunk.
4602 llvm::Value *Chunk = nullptr;
4603 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
4604 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
4605 ScheduleKind = C->getDistScheduleKind();
4606 if (const Expr *Ch = C->getChunkSize()) {
4607 Chunk = EmitScalarExpr(Ch);
4608 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
4609 S.getIterationVariable()->getType(),
4610 S.getBeginLoc());
4611 }
4612 } else {
4613 // Default behaviour for dist_schedule clause.
4614 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
4615 *this, S, ScheduleKind, Chunk);
4616 }
4617 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
4618 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
4619
4620 // OpenMP [2.10.8, distribute Construct, Description]
4621 // If dist_schedule is specified, kind must be static. If specified,
4622 // iterations are divided into chunks of size chunk_size, chunks are
4623 // assigned to the teams of the league in a round-robin fashion in the
4624 // order of the team number. When no chunk_size is specified, the
4625 // iteration space is divided into chunks that are approximately equal
4626 // in size, and at most one chunk is distributed to each team of the
4627 // league. The size of the chunks is unspecified in this case.
4628 bool StaticChunked = RT.isStaticChunked(
4629 ScheduleKind, /* Chunked */ Chunk != nullptr) &&
4630 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
4631 if (RT.isStaticNonchunked(ScheduleKind,
4632 /* Chunked */ Chunk != nullptr) ||
4633 StaticChunked) {
4634 CGOpenMPRuntime::StaticRTInput StaticInit(
4635 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
4636 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4637 StaticChunked ? Chunk : nullptr);
4638 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
4639 StaticInit);
4640 JumpDest LoopExit =
4641 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
4642 // UB = min(UB, GlobalUB);
4643 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4644 ? S.getCombinedEnsureUpperBound()
4645 : S.getEnsureUpperBound());
4646 // IV = LB;
4647 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4648 ? S.getCombinedInit()
4649 : S.getInit());
4650
4651 const Expr *Cond =
4652 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4653 ? S.getCombinedCond()
4654 : S.getCond();
4655
4656 if (StaticChunked)
4657 Cond = S.getCombinedDistCond();
4658
4659 // For static unchunked schedules generate:
4660 //
4661 // 1. For distribute alone, codegen
4662 // while (idx <= UB) {
4663 // BODY;
4664 // ++idx;
4665 // }
4666 //
4667 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
4668 // while (idx <= UB) {
4669 // <CodeGen rest of pragma>(LB, UB);
4670 // idx += ST;
4671 // }
4672 //
4673 // For static chunk one schedule generate:
4674 //
4675 // while (IV <= GlobalUB) {
4676 // <CodeGen rest of pragma>(LB, UB);
4677 // LB += ST;
4678 // UB += ST;
4679 // UB = min(UB, GlobalUB);
4680 // IV = LB;
4681 // }
4682 //
4683 emitCommonSimdLoop(
4684 *this, S,
4685 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4686 if (isOpenMPSimdDirective(S.getDirectiveKind()))
4687 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
4688 },
4689 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
4690 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
4691 CGF.EmitOMPInnerLoop(
4692 S, LoopScope.requiresCleanups(), Cond, IncExpr,
4693 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
4694 CodeGenLoop(CGF, S, LoopExit);
4695 },
4696 [&S, StaticChunked](CodeGenFunction &CGF) {
4697 if (StaticChunked) {
4698 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
4699 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
4700 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
4701 CGF.EmitIgnoredExpr(S.getCombinedInit());
4702 }
4703 });
4704 });
4705 EmitBlock(LoopExit.getBlock());
4706 // Tell the runtime we are done.
4707 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
4708 } else {
4709 // Emit the outer loop, which requests its work chunk [LB..UB] from
4710 // runtime and runs the inner loop to process it.
4711 const OMPLoopArguments LoopArguments = {
4712 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4713 IL.getAddress(*this), Chunk};
4714 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
4715 CodeGenLoop);
4716 }
4717 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
4718 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
4719 return CGF.Builder.CreateIsNotNull(
4720 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4721 });
4722 }
4723 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4724 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4725 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
4726 EmitOMPReductionClauseFinal(S, OMPD_simd);
4727 // Emit post-update of the reduction variables if IsLastIter != 0.
4728 emitPostUpdateForReductionClause(
4729 *this, S, [IL, &S](CodeGenFunction &CGF) {
4730 return CGF.Builder.CreateIsNotNull(
4731 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4732 });
4733 }
4734 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4735 if (HasLastprivateClause) {
4736 EmitOMPLastprivateClauseFinal(
4737 S, /*NoFinals=*/false,
4738 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
4739 }
4740 }
4741
4742 // We're now done with the loop, so jump to the continuation block.
4743 if (ContBlock) {
4744 EmitBranch(ContBlock);
4745 EmitBlock(ContBlock, true);
4746 }
4747 }
4748 }
4749
EmitOMPDistributeDirective(const OMPDistributeDirective & S)4750 void CodeGenFunction::EmitOMPDistributeDirective(
4751 const OMPDistributeDirective &S) {
4752 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4753 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4754 };
4755 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4756 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
4757 }
4758
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)4759 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
4760 const CapturedStmt *S,
4761 SourceLocation Loc) {
4762 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
4763 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
4764 CGF.CapturedStmtInfo = &CapStmtInfo;
4765 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
4766 Fn->setDoesNotRecurse();
4767 return Fn;
4768 }
4769
EmitOMPOrderedDirective(const OMPOrderedDirective & S)4770 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
4771 if (S.hasClausesOfKind<OMPDependClause>()) {
4772 assert(!S.getAssociatedStmt() &&
4773 "No associated statement must be in ordered depend construct.");
4774 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
4775 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
4776 return;
4777 }
4778 const auto *C = S.getSingleClause<OMPSIMDClause>();
4779 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
4780 PrePostActionTy &Action) {
4781 const CapturedStmt *CS = S.getInnermostCapturedStmt();
4782 if (C) {
4783 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4784 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4785 llvm::Function *OutlinedFn =
4786 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
4787 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
4788 OutlinedFn, CapturedVars);
4789 } else {
4790 Action.Enter(CGF);
4791 CGF.EmitStmt(CS->getCapturedStmt());
4792 }
4793 };
4794 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4795 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
4796 }
4797
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)4798 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
4799 QualType SrcType, QualType DestType,
4800 SourceLocation Loc) {
4801 assert(CGF.hasScalarEvaluationKind(DestType) &&
4802 "DestType must have scalar evaluation kind.");
4803 assert(!Val.isAggregate() && "Must be a scalar or complex.");
4804 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
4805 DestType, Loc)
4806 : CGF.EmitComplexToScalarConversion(
4807 Val.getComplexVal(), SrcType, DestType, Loc);
4808 }
4809
4810 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)4811 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
4812 QualType DestType, SourceLocation Loc) {
4813 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
4814 "DestType must have complex evaluation kind.");
4815 CodeGenFunction::ComplexPairTy ComplexVal;
4816 if (Val.isScalar()) {
4817 // Convert the input element to the element type of the complex.
4818 QualType DestElementType =
4819 DestType->castAs<ComplexType>()->getElementType();
4820 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
4821 Val.getScalarVal(), SrcType, DestElementType, Loc);
4822 ComplexVal = CodeGenFunction::ComplexPairTy(
4823 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
4824 } else {
4825 assert(Val.isComplex() && "Must be a scalar or complex.");
4826 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
4827 QualType DestElementType =
4828 DestType->castAs<ComplexType>()->getElementType();
4829 ComplexVal.first = CGF.EmitScalarConversion(
4830 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
4831 ComplexVal.second = CGF.EmitScalarConversion(
4832 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
4833 }
4834 return ComplexVal;
4835 }
4836
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)4837 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4838 LValue LVal, RValue RVal) {
4839 if (LVal.isGlobalReg())
4840 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
4841 else
4842 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
4843 }
4844
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)4845 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
4846 llvm::AtomicOrdering AO, LValue LVal,
4847 SourceLocation Loc) {
4848 if (LVal.isGlobalReg())
4849 return CGF.EmitLoadOfLValue(LVal, Loc);
4850 return CGF.EmitAtomicLoad(
4851 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
4852 LVal.isVolatile());
4853 }
4854
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)4855 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
4856 QualType RValTy, SourceLocation Loc) {
4857 switch (getEvaluationKind(LVal.getType())) {
4858 case TEK_Scalar:
4859 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
4860 *this, RVal, RValTy, LVal.getType(), Loc)),
4861 LVal);
4862 break;
4863 case TEK_Complex:
4864 EmitStoreOfComplex(
4865 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
4866 /*isInit=*/false);
4867 break;
4868 case TEK_Aggregate:
4869 llvm_unreachable("Must be a scalar or complex.");
4870 }
4871 }
4872
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)4873 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4874 const Expr *X, const Expr *V,
4875 SourceLocation Loc) {
4876 // v = x;
4877 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
4878 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
4879 LValue XLValue = CGF.EmitLValue(X);
4880 LValue VLValue = CGF.EmitLValue(V);
4881 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
4882 // OpenMP, 2.17.7, atomic Construct
4883 // If the read or capture clause is specified and the acquire, acq_rel, or
4884 // seq_cst clause is specified then the strong flush on exit from the atomic
4885 // operation is also an acquire flush.
4886 switch (AO) {
4887 case llvm::AtomicOrdering::Acquire:
4888 case llvm::AtomicOrdering::AcquireRelease:
4889 case llvm::AtomicOrdering::SequentiallyConsistent:
4890 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4891 llvm::AtomicOrdering::Acquire);
4892 break;
4893 case llvm::AtomicOrdering::Monotonic:
4894 case llvm::AtomicOrdering::Release:
4895 break;
4896 case llvm::AtomicOrdering::NotAtomic:
4897 case llvm::AtomicOrdering::Unordered:
4898 llvm_unreachable("Unexpected ordering.");
4899 }
4900 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
4901 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
4902 }
4903
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)4904 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
4905 llvm::AtomicOrdering AO, const Expr *X,
4906 const Expr *E, SourceLocation Loc) {
4907 // x = expr;
4908 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
4909 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
4910 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
4911 // OpenMP, 2.17.7, atomic Construct
4912 // If the write, update, or capture clause is specified and the release,
4913 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
4914 // the atomic operation is also a release flush.
4915 switch (AO) {
4916 case llvm::AtomicOrdering::Release:
4917 case llvm::AtomicOrdering::AcquireRelease:
4918 case llvm::AtomicOrdering::SequentiallyConsistent:
4919 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4920 llvm::AtomicOrdering::Release);
4921 break;
4922 case llvm::AtomicOrdering::Acquire:
4923 case llvm::AtomicOrdering::Monotonic:
4924 break;
4925 case llvm::AtomicOrdering::NotAtomic:
4926 case llvm::AtomicOrdering::Unordered:
4927 llvm_unreachable("Unexpected ordering.");
4928 }
4929 }
4930
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)4931 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
4932 RValue Update,
4933 BinaryOperatorKind BO,
4934 llvm::AtomicOrdering AO,
4935 bool IsXLHSInRHSPart) {
4936 ASTContext &Context = CGF.getContext();
4937 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
4938 // expression is simple and atomic is allowed for the given type for the
4939 // target platform.
4940 if (BO == BO_Comma || !Update.isScalar() ||
4941 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
4942 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
4943 (Update.getScalarVal()->getType() !=
4944 X.getAddress(CGF).getElementType())) ||
4945 !X.getAddress(CGF).getElementType()->isIntegerTy() ||
4946 !Context.getTargetInfo().hasBuiltinAtomic(
4947 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
4948 return std::make_pair(false, RValue::get(nullptr));
4949
4950 llvm::AtomicRMWInst::BinOp RMWOp;
4951 switch (BO) {
4952 case BO_Add:
4953 RMWOp = llvm::AtomicRMWInst::Add;
4954 break;
4955 case BO_Sub:
4956 if (!IsXLHSInRHSPart)
4957 return std::make_pair(false, RValue::get(nullptr));
4958 RMWOp = llvm::AtomicRMWInst::Sub;
4959 break;
4960 case BO_And:
4961 RMWOp = llvm::AtomicRMWInst::And;
4962 break;
4963 case BO_Or:
4964 RMWOp = llvm::AtomicRMWInst::Or;
4965 break;
4966 case BO_Xor:
4967 RMWOp = llvm::AtomicRMWInst::Xor;
4968 break;
4969 case BO_LT:
4970 RMWOp = X.getType()->hasSignedIntegerRepresentation()
4971 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
4972 : llvm::AtomicRMWInst::Max)
4973 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
4974 : llvm::AtomicRMWInst::UMax);
4975 break;
4976 case BO_GT:
4977 RMWOp = X.getType()->hasSignedIntegerRepresentation()
4978 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
4979 : llvm::AtomicRMWInst::Min)
4980 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
4981 : llvm::AtomicRMWInst::UMin);
4982 break;
4983 case BO_Assign:
4984 RMWOp = llvm::AtomicRMWInst::Xchg;
4985 break;
4986 case BO_Mul:
4987 case BO_Div:
4988 case BO_Rem:
4989 case BO_Shl:
4990 case BO_Shr:
4991 case BO_LAnd:
4992 case BO_LOr:
4993 return std::make_pair(false, RValue::get(nullptr));
4994 case BO_PtrMemD:
4995 case BO_PtrMemI:
4996 case BO_LE:
4997 case BO_GE:
4998 case BO_EQ:
4999 case BO_NE:
5000 case BO_Cmp:
5001 case BO_AddAssign:
5002 case BO_SubAssign:
5003 case BO_AndAssign:
5004 case BO_OrAssign:
5005 case BO_XorAssign:
5006 case BO_MulAssign:
5007 case BO_DivAssign:
5008 case BO_RemAssign:
5009 case BO_ShlAssign:
5010 case BO_ShrAssign:
5011 case BO_Comma:
5012 llvm_unreachable("Unsupported atomic update operation");
5013 }
5014 llvm::Value *UpdateVal = Update.getScalarVal();
5015 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5016 UpdateVal = CGF.Builder.CreateIntCast(
5017 IC, X.getAddress(CGF).getElementType(),
5018 X.getType()->hasSignedIntegerRepresentation());
5019 }
5020 llvm::Value *Res =
5021 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5022 return std::make_pair(true, RValue::get(Res));
5023 }
5024
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)5025 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
5026 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5027 llvm::AtomicOrdering AO, SourceLocation Loc,
5028 const llvm::function_ref<RValue(RValue)> CommonGen) {
5029 // Update expressions are allowed to have the following forms:
5030 // x binop= expr; -> xrval + expr;
5031 // x++, ++x -> xrval + 1;
5032 // x--, --x -> xrval - 1;
5033 // x = x binop expr; -> xrval binop expr
5034 // x = expr Op x; - > expr binop xrval;
5035 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5036 if (!Res.first) {
5037 if (X.isGlobalReg()) {
5038 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5039 // 'xrval'.
5040 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5041 } else {
5042 // Perform compare-and-swap procedure.
5043 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5044 }
5045 }
5046 return Res;
5047 }
5048
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5049 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
5050 llvm::AtomicOrdering AO, const Expr *X,
5051 const Expr *E, const Expr *UE,
5052 bool IsXLHSInRHSPart, SourceLocation Loc) {
5053 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5054 "Update expr in 'atomic update' must be a binary operator.");
5055 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5056 // Update expressions are allowed to have the following forms:
5057 // x binop= expr; -> xrval + expr;
5058 // x++, ++x -> xrval + 1;
5059 // x--, --x -> xrval - 1;
5060 // x = x binop expr; -> xrval binop expr
5061 // x = expr Op x; - > expr binop xrval;
5062 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5063 LValue XLValue = CGF.EmitLValue(X);
5064 RValue ExprRValue = CGF.EmitAnyExpr(E);
5065 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5066 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5067 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5068 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5069 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5070 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5071 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5072 return CGF.EmitAnyExpr(UE);
5073 };
5074 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5075 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5076 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5077 // OpenMP, 2.17.7, atomic Construct
5078 // If the write, update, or capture clause is specified and the release,
5079 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5080 // the atomic operation is also a release flush.
5081 switch (AO) {
5082 case llvm::AtomicOrdering::Release:
5083 case llvm::AtomicOrdering::AcquireRelease:
5084 case llvm::AtomicOrdering::SequentiallyConsistent:
5085 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5086 llvm::AtomicOrdering::Release);
5087 break;
5088 case llvm::AtomicOrdering::Acquire:
5089 case llvm::AtomicOrdering::Monotonic:
5090 break;
5091 case llvm::AtomicOrdering::NotAtomic:
5092 case llvm::AtomicOrdering::Unordered:
5093 llvm_unreachable("Unexpected ordering.");
5094 }
5095 }
5096
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)5097 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
5098 QualType SourceType, QualType ResType,
5099 SourceLocation Loc) {
5100 switch (CGF.getEvaluationKind(ResType)) {
5101 case TEK_Scalar:
5102 return RValue::get(
5103 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5104 case TEK_Complex: {
5105 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5106 return RValue::getComplex(Res.first, Res.second);
5107 }
5108 case TEK_Aggregate:
5109 break;
5110 }
5111 llvm_unreachable("Must be a scalar or complex.");
5112 }
5113
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5114 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
5115 llvm::AtomicOrdering AO,
5116 bool IsPostfixUpdate, const Expr *V,
5117 const Expr *X, const Expr *E,
5118 const Expr *UE, bool IsXLHSInRHSPart,
5119 SourceLocation Loc) {
5120 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
5121 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
5122 RValue NewVVal;
5123 LValue VLValue = CGF.EmitLValue(V);
5124 LValue XLValue = CGF.EmitLValue(X);
5125 RValue ExprRValue = CGF.EmitAnyExpr(E);
5126 QualType NewVValType;
5127 if (UE) {
5128 // 'x' is updated with some additional value.
5129 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5130 "Update expr in 'atomic capture' must be a binary operator.");
5131 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5132 // Update expressions are allowed to have the following forms:
5133 // x binop= expr; -> xrval + expr;
5134 // x++, ++x -> xrval + 1;
5135 // x--, --x -> xrval - 1;
5136 // x = x binop expr; -> xrval binop expr
5137 // x = expr Op x; - > expr binop xrval;
5138 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5139 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5140 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5141 NewVValType = XRValExpr->getType();
5142 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5143 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
5144 IsPostfixUpdate](RValue XRValue) {
5145 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5146 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5147 RValue Res = CGF.EmitAnyExpr(UE);
5148 NewVVal = IsPostfixUpdate ? XRValue : Res;
5149 return Res;
5150 };
5151 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5152 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5153 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5154 if (Res.first) {
5155 // 'atomicrmw' instruction was generated.
5156 if (IsPostfixUpdate) {
5157 // Use old value from 'atomicrmw'.
5158 NewVVal = Res.second;
5159 } else {
5160 // 'atomicrmw' does not provide new value, so evaluate it using old
5161 // value of 'x'.
5162 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5163 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
5164 NewVVal = CGF.EmitAnyExpr(UE);
5165 }
5166 }
5167 } else {
5168 // 'x' is simply rewritten with some 'expr'.
5169 NewVValType = X->getType().getNonReferenceType();
5170 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
5171 X->getType().getNonReferenceType(), Loc);
5172 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
5173 NewVVal = XRValue;
5174 return ExprRValue;
5175 };
5176 // Try to perform atomicrmw xchg, otherwise simple exchange.
5177 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
5178 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
5179 Loc, Gen);
5180 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
5181 if (Res.first) {
5182 // 'atomicrmw' instruction was generated.
5183 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
5184 }
5185 }
5186 // Emit post-update store to 'v' of old/new 'x' value.
5187 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
5188 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
5189 // OpenMP, 2.17.7, atomic Construct
5190 // If the write, update, or capture clause is specified and the release,
5191 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5192 // the atomic operation is also a release flush.
5193 // If the read or capture clause is specified and the acquire, acq_rel, or
5194 // seq_cst clause is specified then the strong flush on exit from the atomic
5195 // operation is also an acquire flush.
5196 switch (AO) {
5197 case llvm::AtomicOrdering::Release:
5198 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5199 llvm::AtomicOrdering::Release);
5200 break;
5201 case llvm::AtomicOrdering::Acquire:
5202 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5203 llvm::AtomicOrdering::Acquire);
5204 break;
5205 case llvm::AtomicOrdering::AcquireRelease:
5206 case llvm::AtomicOrdering::SequentiallyConsistent:
5207 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5208 llvm::AtomicOrdering::AcquireRelease);
5209 break;
5210 case llvm::AtomicOrdering::Monotonic:
5211 break;
5212 case llvm::AtomicOrdering::NotAtomic:
5213 case llvm::AtomicOrdering::Unordered:
5214 llvm_unreachable("Unexpected ordering.");
5215 }
5216 }
5217
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)5218 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
5219 llvm::AtomicOrdering AO, bool IsPostfixUpdate,
5220 const Expr *X, const Expr *V, const Expr *E,
5221 const Expr *UE, bool IsXLHSInRHSPart,
5222 SourceLocation Loc) {
5223 switch (Kind) {
5224 case OMPC_read:
5225 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
5226 break;
5227 case OMPC_write:
5228 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
5229 break;
5230 case OMPC_unknown:
5231 case OMPC_update:
5232 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
5233 break;
5234 case OMPC_capture:
5235 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
5236 IsXLHSInRHSPart, Loc);
5237 break;
5238 case OMPC_if:
5239 case OMPC_final:
5240 case OMPC_num_threads:
5241 case OMPC_private:
5242 case OMPC_firstprivate:
5243 case OMPC_lastprivate:
5244 case OMPC_reduction:
5245 case OMPC_task_reduction:
5246 case OMPC_in_reduction:
5247 case OMPC_safelen:
5248 case OMPC_simdlen:
5249 case OMPC_allocator:
5250 case OMPC_allocate:
5251 case OMPC_collapse:
5252 case OMPC_default:
5253 case OMPC_seq_cst:
5254 case OMPC_acq_rel:
5255 case OMPC_acquire:
5256 case OMPC_release:
5257 case OMPC_relaxed:
5258 case OMPC_shared:
5259 case OMPC_linear:
5260 case OMPC_aligned:
5261 case OMPC_copyin:
5262 case OMPC_copyprivate:
5263 case OMPC_flush:
5264 case OMPC_depobj:
5265 case OMPC_proc_bind:
5266 case OMPC_schedule:
5267 case OMPC_ordered:
5268 case OMPC_nowait:
5269 case OMPC_untied:
5270 case OMPC_threadprivate:
5271 case OMPC_depend:
5272 case OMPC_mergeable:
5273 case OMPC_device:
5274 case OMPC_threads:
5275 case OMPC_simd:
5276 case OMPC_map:
5277 case OMPC_num_teams:
5278 case OMPC_thread_limit:
5279 case OMPC_priority:
5280 case OMPC_grainsize:
5281 case OMPC_nogroup:
5282 case OMPC_num_tasks:
5283 case OMPC_hint:
5284 case OMPC_dist_schedule:
5285 case OMPC_defaultmap:
5286 case OMPC_uniform:
5287 case OMPC_to:
5288 case OMPC_from:
5289 case OMPC_use_device_ptr:
5290 case OMPC_use_device_addr:
5291 case OMPC_is_device_ptr:
5292 case OMPC_unified_address:
5293 case OMPC_unified_shared_memory:
5294 case OMPC_reverse_offload:
5295 case OMPC_dynamic_allocators:
5296 case OMPC_atomic_default_mem_order:
5297 case OMPC_device_type:
5298 case OMPC_match:
5299 case OMPC_nontemporal:
5300 case OMPC_order:
5301 case OMPC_destroy:
5302 case OMPC_detach:
5303 case OMPC_inclusive:
5304 case OMPC_exclusive:
5305 case OMPC_uses_allocators:
5306 case OMPC_affinity:
5307 default:
5308 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
5309 }
5310 }
5311
EmitOMPAtomicDirective(const OMPAtomicDirective & S)5312 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
5313 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
5314 bool MemOrderingSpecified = false;
5315 if (S.getSingleClause<OMPSeqCstClause>()) {
5316 AO = llvm::AtomicOrdering::SequentiallyConsistent;
5317 MemOrderingSpecified = true;
5318 } else if (S.getSingleClause<OMPAcqRelClause>()) {
5319 AO = llvm::AtomicOrdering::AcquireRelease;
5320 MemOrderingSpecified = true;
5321 } else if (S.getSingleClause<OMPAcquireClause>()) {
5322 AO = llvm::AtomicOrdering::Acquire;
5323 MemOrderingSpecified = true;
5324 } else if (S.getSingleClause<OMPReleaseClause>()) {
5325 AO = llvm::AtomicOrdering::Release;
5326 MemOrderingSpecified = true;
5327 } else if (S.getSingleClause<OMPRelaxedClause>()) {
5328 AO = llvm::AtomicOrdering::Monotonic;
5329 MemOrderingSpecified = true;
5330 }
5331 OpenMPClauseKind Kind = OMPC_unknown;
5332 for (const OMPClause *C : S.clauses()) {
5333 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
5334 // if it is first).
5335 if (C->getClauseKind() != OMPC_seq_cst &&
5336 C->getClauseKind() != OMPC_acq_rel &&
5337 C->getClauseKind() != OMPC_acquire &&
5338 C->getClauseKind() != OMPC_release &&
5339 C->getClauseKind() != OMPC_relaxed) {
5340 Kind = C->getClauseKind();
5341 break;
5342 }
5343 }
5344 if (!MemOrderingSpecified) {
5345 llvm::AtomicOrdering DefaultOrder =
5346 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
5347 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
5348 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
5349 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
5350 Kind == OMPC_capture)) {
5351 AO = DefaultOrder;
5352 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
5353 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
5354 AO = llvm::AtomicOrdering::Release;
5355 } else if (Kind == OMPC_read) {
5356 assert(Kind == OMPC_read && "Unexpected atomic kind.");
5357 AO = llvm::AtomicOrdering::Acquire;
5358 }
5359 }
5360 }
5361
5362 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
5363
5364 auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF,
5365 PrePostActionTy &) {
5366 CGF.EmitStopPoint(CS);
5367 emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
5368 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
5369 S.getBeginLoc());
5370 };
5371 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5372 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
5373 }
5374
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)5375 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
5376 const OMPExecutableDirective &S,
5377 const RegionCodeGenTy &CodeGen) {
5378 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
5379 CodeGenModule &CGM = CGF.CGM;
5380
5381 // On device emit this construct as inlined code.
5382 if (CGM.getLangOpts().OpenMPIsDevice) {
5383 OMPLexicalScope Scope(CGF, S, OMPD_target);
5384 CGM.getOpenMPRuntime().emitInlinedDirective(
5385 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5386 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5387 });
5388 return;
5389 }
5390
5391 auto LPCRegion =
5392 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
5393 llvm::Function *Fn = nullptr;
5394 llvm::Constant *FnID = nullptr;
5395
5396 const Expr *IfCond = nullptr;
5397 // Check for the at most one if clause associated with the target region.
5398 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5399 if (C->getNameModifier() == OMPD_unknown ||
5400 C->getNameModifier() == OMPD_target) {
5401 IfCond = C->getCondition();
5402 break;
5403 }
5404 }
5405
5406 // Check if we have any device clause associated with the directive.
5407 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
5408 nullptr, OMPC_DEVICE_unknown);
5409 if (auto *C = S.getSingleClause<OMPDeviceClause>())
5410 Device.setPointerAndInt(C->getDevice(), C->getModifier());
5411
5412 // Check if we have an if clause whose conditional always evaluates to false
5413 // or if we do not have any targets specified. If so the target region is not
5414 // an offload entry point.
5415 bool IsOffloadEntry = true;
5416 if (IfCond) {
5417 bool Val;
5418 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
5419 IsOffloadEntry = false;
5420 }
5421 if (CGM.getLangOpts().OMPTargetTriples.empty())
5422 IsOffloadEntry = false;
5423
5424 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
5425 StringRef ParentName;
5426 // In case we have Ctors/Dtors we use the complete type variant to produce
5427 // the mangling of the device outlined kernel.
5428 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
5429 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
5430 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
5431 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
5432 else
5433 ParentName =
5434 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
5435
5436 // Emit target region as a standalone region.
5437 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
5438 IsOffloadEntry, CodeGen);
5439 OMPLexicalScope Scope(CGF, S, OMPD_task);
5440 auto &&SizeEmitter =
5441 [IsOffloadEntry](CodeGenFunction &CGF,
5442 const OMPLoopDirective &D) -> llvm::Value * {
5443 if (IsOffloadEntry) {
5444 OMPLoopScope(CGF, D);
5445 // Emit calculation of the iterations count.
5446 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
5447 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
5448 /*isSigned=*/false);
5449 return NumIterations;
5450 }
5451 return nullptr;
5452 };
5453 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
5454 SizeEmitter);
5455 }
5456
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)5457 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
5458 PrePostActionTy &Action) {
5459 Action.Enter(CGF);
5460 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5461 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5462 CGF.EmitOMPPrivateClause(S, PrivateScope);
5463 (void)PrivateScope.Privatize();
5464 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5465 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5466
5467 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
5468 }
5469
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)5470 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
5471 StringRef ParentName,
5472 const OMPTargetDirective &S) {
5473 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5474 emitTargetRegion(CGF, S, Action);
5475 };
5476 llvm::Function *Fn;
5477 llvm::Constant *Addr;
5478 // Emit target region as a standalone region.
5479 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5480 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5481 assert(Fn && Addr && "Target device function emission failed.");
5482 }
5483
EmitOMPTargetDirective(const OMPTargetDirective & S)5484 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
5485 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5486 emitTargetRegion(CGF, S, Action);
5487 };
5488 emitCommonOMPTargetDirective(*this, S, CodeGen);
5489 }
5490
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)5491 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
5492 const OMPExecutableDirective &S,
5493 OpenMPDirectiveKind InnermostKind,
5494 const RegionCodeGenTy &CodeGen) {
5495 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
5496 llvm::Function *OutlinedFn =
5497 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
5498 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
5499
5500 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
5501 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5502 if (NT || TL) {
5503 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
5504 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
5505
5506 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
5507 S.getBeginLoc());
5508 }
5509
5510 OMPTeamsScope Scope(CGF, S);
5511 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5512 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5513 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
5514 CapturedVars);
5515 }
5516
EmitOMPTeamsDirective(const OMPTeamsDirective & S)5517 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
5518 // Emit teams region as a standalone region.
5519 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5520 Action.Enter(CGF);
5521 OMPPrivateScope PrivateScope(CGF);
5522 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5523 CGF.EmitOMPPrivateClause(S, PrivateScope);
5524 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5525 (void)PrivateScope.Privatize();
5526 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
5527 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5528 };
5529 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5530 emitPostUpdateForReductionClause(*this, S,
5531 [](CodeGenFunction &) { return nullptr; });
5532 }
5533
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)5534 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5535 const OMPTargetTeamsDirective &S) {
5536 auto *CS = S.getCapturedStmt(OMPD_teams);
5537 Action.Enter(CGF);
5538 // Emit teams region as a standalone region.
5539 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
5540 Action.Enter(CGF);
5541 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5542 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5543 CGF.EmitOMPPrivateClause(S, PrivateScope);
5544 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5545 (void)PrivateScope.Privatize();
5546 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5547 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
5548 CGF.EmitStmt(CS->getCapturedStmt());
5549 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5550 };
5551 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
5552 emitPostUpdateForReductionClause(CGF, S,
5553 [](CodeGenFunction &) { return nullptr; });
5554 }
5555
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)5556 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
5557 CodeGenModule &CGM, StringRef ParentName,
5558 const OMPTargetTeamsDirective &S) {
5559 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5560 emitTargetTeamsRegion(CGF, Action, S);
5561 };
5562 llvm::Function *Fn;
5563 llvm::Constant *Addr;
5564 // Emit target region as a standalone region.
5565 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5566 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5567 assert(Fn && Addr && "Target device function emission failed.");
5568 }
5569
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)5570 void CodeGenFunction::EmitOMPTargetTeamsDirective(
5571 const OMPTargetTeamsDirective &S) {
5572 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5573 emitTargetTeamsRegion(CGF, Action, S);
5574 };
5575 emitCommonOMPTargetDirective(*this, S, CodeGen);
5576 }
5577
5578 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)5579 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
5580 const OMPTargetTeamsDistributeDirective &S) {
5581 Action.Enter(CGF);
5582 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5583 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5584 };
5585
5586 // Emit teams region as a standalone region.
5587 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5588 PrePostActionTy &Action) {
5589 Action.Enter(CGF);
5590 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5591 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5592 (void)PrivateScope.Privatize();
5593 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5594 CodeGenDistribute);
5595 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5596 };
5597 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
5598 emitPostUpdateForReductionClause(CGF, S,
5599 [](CodeGenFunction &) { return nullptr; });
5600 }
5601
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)5602 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
5603 CodeGenModule &CGM, StringRef ParentName,
5604 const OMPTargetTeamsDistributeDirective &S) {
5605 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5606 emitTargetTeamsDistributeRegion(CGF, Action, S);
5607 };
5608 llvm::Function *Fn;
5609 llvm::Constant *Addr;
5610 // Emit target region as a standalone region.
5611 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5612 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5613 assert(Fn && Addr && "Target device function emission failed.");
5614 }
5615
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)5616 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
5617 const OMPTargetTeamsDistributeDirective &S) {
5618 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5619 emitTargetTeamsDistributeRegion(CGF, Action, S);
5620 };
5621 emitCommonOMPTargetDirective(*this, S, CodeGen);
5622 }
5623
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)5624 static void emitTargetTeamsDistributeSimdRegion(
5625 CodeGenFunction &CGF, PrePostActionTy &Action,
5626 const OMPTargetTeamsDistributeSimdDirective &S) {
5627 Action.Enter(CGF);
5628 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5629 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5630 };
5631
5632 // Emit teams region as a standalone region.
5633 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5634 PrePostActionTy &Action) {
5635 Action.Enter(CGF);
5636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5637 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5638 (void)PrivateScope.Privatize();
5639 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5640 CodeGenDistribute);
5641 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5642 };
5643 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
5644 emitPostUpdateForReductionClause(CGF, S,
5645 [](CodeGenFunction &) { return nullptr; });
5646 }
5647
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)5648 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
5649 CodeGenModule &CGM, StringRef ParentName,
5650 const OMPTargetTeamsDistributeSimdDirective &S) {
5651 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5652 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5653 };
5654 llvm::Function *Fn;
5655 llvm::Constant *Addr;
5656 // Emit target region as a standalone region.
5657 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5658 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5659 assert(Fn && Addr && "Target device function emission failed.");
5660 }
5661
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)5662 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
5663 const OMPTargetTeamsDistributeSimdDirective &S) {
5664 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5665 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
5666 };
5667 emitCommonOMPTargetDirective(*this, S, CodeGen);
5668 }
5669
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)5670 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
5671 const OMPTeamsDistributeDirective &S) {
5672
5673 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5674 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5675 };
5676
5677 // Emit teams region as a standalone region.
5678 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5679 PrePostActionTy &Action) {
5680 Action.Enter(CGF);
5681 OMPPrivateScope PrivateScope(CGF);
5682 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5683 (void)PrivateScope.Privatize();
5684 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5685 CodeGenDistribute);
5686 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5687 };
5688 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
5689 emitPostUpdateForReductionClause(*this, S,
5690 [](CodeGenFunction &) { return nullptr; });
5691 }
5692
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)5693 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
5694 const OMPTeamsDistributeSimdDirective &S) {
5695 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5696 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5697 };
5698
5699 // Emit teams region as a standalone region.
5700 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5701 PrePostActionTy &Action) {
5702 Action.Enter(CGF);
5703 OMPPrivateScope PrivateScope(CGF);
5704 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5705 (void)PrivateScope.Privatize();
5706 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
5707 CodeGenDistribute);
5708 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5709 };
5710 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
5711 emitPostUpdateForReductionClause(*this, S,
5712 [](CodeGenFunction &) { return nullptr; });
5713 }
5714
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)5715 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
5716 const OMPTeamsDistributeParallelForDirective &S) {
5717 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5718 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5719 S.getDistInc());
5720 };
5721
5722 // Emit teams region as a standalone region.
5723 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5724 PrePostActionTy &Action) {
5725 Action.Enter(CGF);
5726 OMPPrivateScope PrivateScope(CGF);
5727 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5728 (void)PrivateScope.Privatize();
5729 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
5730 CodeGenDistribute);
5731 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5732 };
5733 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
5734 emitPostUpdateForReductionClause(*this, S,
5735 [](CodeGenFunction &) { return nullptr; });
5736 }
5737
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)5738 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
5739 const OMPTeamsDistributeParallelForSimdDirective &S) {
5740 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5741 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5742 S.getDistInc());
5743 };
5744
5745 // Emit teams region as a standalone region.
5746 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5747 PrePostActionTy &Action) {
5748 Action.Enter(CGF);
5749 OMPPrivateScope PrivateScope(CGF);
5750 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5751 (void)PrivateScope.Privatize();
5752 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5753 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5754 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5755 };
5756 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
5757 CodeGen);
5758 emitPostUpdateForReductionClause(*this, S,
5759 [](CodeGenFunction &) { return nullptr; });
5760 }
5761
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)5762 static void emitTargetTeamsDistributeParallelForRegion(
5763 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
5764 PrePostActionTy &Action) {
5765 Action.Enter(CGF);
5766 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5767 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5768 S.getDistInc());
5769 };
5770
5771 // Emit teams region as a standalone region.
5772 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5773 PrePostActionTy &Action) {
5774 Action.Enter(CGF);
5775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5776 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5777 (void)PrivateScope.Privatize();
5778 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5779 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5780 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5781 };
5782
5783 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
5784 CodeGenTeams);
5785 emitPostUpdateForReductionClause(CGF, S,
5786 [](CodeGenFunction &) { return nullptr; });
5787 }
5788
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)5789 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
5790 CodeGenModule &CGM, StringRef ParentName,
5791 const OMPTargetTeamsDistributeParallelForDirective &S) {
5792 // Emit SPMD target teams distribute parallel for region as a standalone
5793 // region.
5794 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5795 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5796 };
5797 llvm::Function *Fn;
5798 llvm::Constant *Addr;
5799 // Emit target region as a standalone region.
5800 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5801 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5802 assert(Fn && Addr && "Target device function emission failed.");
5803 }
5804
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)5805 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
5806 const OMPTargetTeamsDistributeParallelForDirective &S) {
5807 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5808 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
5809 };
5810 emitCommonOMPTargetDirective(*this, S, CodeGen);
5811 }
5812
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)5813 static void emitTargetTeamsDistributeParallelForSimdRegion(
5814 CodeGenFunction &CGF,
5815 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
5816 PrePostActionTy &Action) {
5817 Action.Enter(CGF);
5818 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5819 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
5820 S.getDistInc());
5821 };
5822
5823 // Emit teams region as a standalone region.
5824 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
5825 PrePostActionTy &Action) {
5826 Action.Enter(CGF);
5827 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5828 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5829 (void)PrivateScope.Privatize();
5830 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
5831 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
5832 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
5833 };
5834
5835 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
5836 CodeGenTeams);
5837 emitPostUpdateForReductionClause(CGF, S,
5838 [](CodeGenFunction &) { return nullptr; });
5839 }
5840
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)5841 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
5842 CodeGenModule &CGM, StringRef ParentName,
5843 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5844 // Emit SPMD target teams distribute parallel for simd region as a standalone
5845 // region.
5846 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5847 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5848 };
5849 llvm::Function *Fn;
5850 llvm::Constant *Addr;
5851 // Emit target region as a standalone region.
5852 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
5853 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
5854 assert(Fn && Addr && "Target device function emission failed.");
5855 }
5856
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)5857 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
5858 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
5859 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5860 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
5861 };
5862 emitCommonOMPTargetDirective(*this, S, CodeGen);
5863 }
5864
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)5865 void CodeGenFunction::EmitOMPCancellationPointDirective(
5866 const OMPCancellationPointDirective &S) {
5867 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
5868 S.getCancelRegion());
5869 }
5870
EmitOMPCancelDirective(const OMPCancelDirective & S)5871 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
5872 const Expr *IfCond = nullptr;
5873 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5874 if (C->getNameModifier() == OMPD_unknown ||
5875 C->getNameModifier() == OMPD_cancel) {
5876 IfCond = C->getCondition();
5877 break;
5878 }
5879 }
5880 if (CGM.getLangOpts().OpenMPIRBuilder) {
5881 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5882 // TODO: This check is necessary as we only generate `omp parallel` through
5883 // the OpenMPIRBuilder for now.
5884 if (S.getCancelRegion() == OMPD_parallel) {
5885 llvm::Value *IfCondition = nullptr;
5886 if (IfCond)
5887 IfCondition = EmitScalarExpr(IfCond,
5888 /*IgnoreResultAssign=*/true);
5889 return Builder.restoreIP(
5890 OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion()));
5891 }
5892 }
5893
5894 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
5895 S.getCancelRegion());
5896 }
5897
5898 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)5899 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
5900 if (Kind == OMPD_parallel || Kind == OMPD_task ||
5901 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
5902 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
5903 return ReturnBlock;
5904 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
5905 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
5906 Kind == OMPD_distribute_parallel_for ||
5907 Kind == OMPD_target_parallel_for ||
5908 Kind == OMPD_teams_distribute_parallel_for ||
5909 Kind == OMPD_target_teams_distribute_parallel_for);
5910 return OMPCancelStack.getExitBlock();
5911 }
5912
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)5913 void CodeGenFunction::EmitOMPUseDevicePtrClause(
5914 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
5915 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
5916 auto OrigVarIt = C.varlist_begin();
5917 auto InitIt = C.inits().begin();
5918 for (const Expr *PvtVarIt : C.private_copies()) {
5919 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
5920 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
5921 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
5922
5923 // In order to identify the right initializer we need to match the
5924 // declaration used by the mapping logic. In some cases we may get
5925 // OMPCapturedExprDecl that refers to the original declaration.
5926 const ValueDecl *MatchingVD = OrigVD;
5927 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
5928 // OMPCapturedExprDecl are used to privative fields of the current
5929 // structure.
5930 const auto *ME = cast<MemberExpr>(OED->getInit());
5931 assert(isa<CXXThisExpr>(ME->getBase()) &&
5932 "Base should be the current struct!");
5933 MatchingVD = ME->getMemberDecl();
5934 }
5935
5936 // If we don't have information about the current list item, move on to
5937 // the next one.
5938 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
5939 if (InitAddrIt == CaptureDeviceAddrMap.end())
5940 continue;
5941
5942 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
5943 InitAddrIt, InitVD,
5944 PvtVD]() {
5945 // Initialize the temporary initialization variable with the address we
5946 // get from the runtime library. We have to cast the source address
5947 // because it is always a void *. References are materialized in the
5948 // privatization scope, so the initialization here disregards the fact
5949 // the original variable is a reference.
5950 QualType AddrQTy =
5951 getContext().getPointerType(OrigVD->getType().getNonReferenceType());
5952 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
5953 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
5954 setAddrOfLocalVar(InitVD, InitAddr);
5955
5956 // Emit private declaration, it will be initialized by the value we
5957 // declaration we just added to the local declarations map.
5958 EmitDecl(*PvtVD);
5959
5960 // The initialization variables reached its purpose in the emission
5961 // of the previous declaration, so we don't need it anymore.
5962 LocalDeclMap.erase(InitVD);
5963
5964 // Return the address of the private variable.
5965 return GetAddrOfLocalVar(PvtVD);
5966 });
5967 assert(IsRegistered && "firstprivate var already registered as private");
5968 // Silence the warning about unused variable.
5969 (void)IsRegistered;
5970
5971 ++OrigVarIt;
5972 ++InitIt;
5973 }
5974 }
5975
getBaseDecl(const Expr * Ref)5976 static const VarDecl *getBaseDecl(const Expr *Ref) {
5977 const Expr *Base = Ref->IgnoreParenImpCasts();
5978 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
5979 Base = OASE->getBase()->IgnoreParenImpCasts();
5980 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
5981 Base = ASE->getBase()->IgnoreParenImpCasts();
5982 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
5983 }
5984
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,Address> & CaptureDeviceAddrMap)5985 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
5986 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
5987 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
5988 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
5989 for (const Expr *Ref : C.varlists()) {
5990 const VarDecl *OrigVD = getBaseDecl(Ref);
5991 if (!Processed.insert(OrigVD).second)
5992 continue;
5993 // In order to identify the right initializer we need to match the
5994 // declaration used by the mapping logic. In some cases we may get
5995 // OMPCapturedExprDecl that refers to the original declaration.
5996 const ValueDecl *MatchingVD = OrigVD;
5997 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
5998 // OMPCapturedExprDecl are used to privative fields of the current
5999 // structure.
6000 const auto *ME = cast<MemberExpr>(OED->getInit());
6001 assert(isa<CXXThisExpr>(ME->getBase()) &&
6002 "Base should be the current struct!");
6003 MatchingVD = ME->getMemberDecl();
6004 }
6005
6006 // If we don't have information about the current list item, move on to
6007 // the next one.
6008 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6009 if (InitAddrIt == CaptureDeviceAddrMap.end())
6010 continue;
6011
6012 Address PrivAddr = InitAddrIt->getSecond();
6013 // For declrefs and variable length array need to load the pointer for
6014 // correct mapping, since the pointer to the data was passed to the runtime.
6015 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6016 MatchingVD->getType()->isArrayType())
6017 PrivAddr =
6018 EmitLoadOfPointer(PrivAddr, getContext()
6019 .getPointerType(OrigVD->getType())
6020 ->castAs<PointerType>());
6021 llvm::Type *RealTy =
6022 ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6023 ->getPointerTo();
6024 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6025
6026 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6027 }
6028 }
6029
6030 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)6031 void CodeGenFunction::EmitOMPTargetDataDirective(
6032 const OMPTargetDataDirective &S) {
6033 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
6034
6035 // Create a pre/post action to signal the privatization of the device pointer.
6036 // This action can be replaced by the OpenMP runtime code generation to
6037 // deactivate privatization.
6038 bool PrivatizeDevicePointers = false;
6039 class DevicePointerPrivActionTy : public PrePostActionTy {
6040 bool &PrivatizeDevicePointers;
6041
6042 public:
6043 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
6044 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
6045 void Enter(CodeGenFunction &CGF) override {
6046 PrivatizeDevicePointers = true;
6047 }
6048 };
6049 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
6050
6051 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
6052 CodeGenFunction &CGF, PrePostActionTy &Action) {
6053 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6054 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6055 };
6056
6057 // Codegen that selects whether to generate the privatization code or not.
6058 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
6059 &InnermostCodeGen](CodeGenFunction &CGF,
6060 PrePostActionTy &Action) {
6061 RegionCodeGenTy RCG(InnermostCodeGen);
6062 PrivatizeDevicePointers = false;
6063
6064 // Call the pre-action to change the status of PrivatizeDevicePointers if
6065 // needed.
6066 Action.Enter(CGF);
6067
6068 if (PrivatizeDevicePointers) {
6069 OMPPrivateScope PrivateScope(CGF);
6070 // Emit all instances of the use_device_ptr clause.
6071 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
6072 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
6073 Info.CaptureDeviceAddrMap);
6074 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6075 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6076 Info.CaptureDeviceAddrMap);
6077 (void)PrivateScope.Privatize();
6078 RCG(CGF);
6079 } else {
6080 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6081 RCG(CGF);
6082 }
6083 };
6084
6085 // Forward the provided action to the privatization codegen.
6086 RegionCodeGenTy PrivRCG(PrivCodeGen);
6087 PrivRCG.setAction(Action);
6088
6089 // Notwithstanding the body of the region is emitted as inlined directive,
6090 // we don't use an inline scope as changes in the references inside the
6091 // region are expected to be visible outside, so we do not privative them.
6092 OMPLexicalScope Scope(CGF, S);
6093 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
6094 PrivRCG);
6095 };
6096
6097 RegionCodeGenTy RCG(CodeGen);
6098
6099 // If we don't have target devices, don't bother emitting the data mapping
6100 // code.
6101 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
6102 RCG(*this);
6103 return;
6104 }
6105
6106 // Check if we have any if clause associated with the directive.
6107 const Expr *IfCond = nullptr;
6108 if (const auto *C = S.getSingleClause<OMPIfClause>())
6109 IfCond = C->getCondition();
6110
6111 // Check if we have any device clause associated with the directive.
6112 const Expr *Device = nullptr;
6113 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6114 Device = C->getDevice();
6115
6116 // Set the action to signal privatization of device pointers.
6117 RCG.setAction(PrivAction);
6118
6119 // Emit region code.
6120 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
6121 Info);
6122 }
6123
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)6124 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
6125 const OMPTargetEnterDataDirective &S) {
6126 // If we don't have target devices, don't bother emitting the data mapping
6127 // code.
6128 if (CGM.getLangOpts().OMPTargetTriples.empty())
6129 return;
6130
6131 // Check if we have any if clause associated with the directive.
6132 const Expr *IfCond = nullptr;
6133 if (const auto *C = S.getSingleClause<OMPIfClause>())
6134 IfCond = C->getCondition();
6135
6136 // Check if we have any device clause associated with the directive.
6137 const Expr *Device = nullptr;
6138 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6139 Device = C->getDevice();
6140
6141 OMPLexicalScope Scope(*this, S, OMPD_task);
6142 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6143 }
6144
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)6145 void CodeGenFunction::EmitOMPTargetExitDataDirective(
6146 const OMPTargetExitDataDirective &S) {
6147 // If we don't have target devices, don't bother emitting the data mapping
6148 // code.
6149 if (CGM.getLangOpts().OMPTargetTriples.empty())
6150 return;
6151
6152 // Check if we have any if clause associated with the directive.
6153 const Expr *IfCond = nullptr;
6154 if (const auto *C = S.getSingleClause<OMPIfClause>())
6155 IfCond = C->getCondition();
6156
6157 // Check if we have any device clause associated with the directive.
6158 const Expr *Device = nullptr;
6159 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6160 Device = C->getDevice();
6161
6162 OMPLexicalScope Scope(*this, S, OMPD_task);
6163 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6164 }
6165
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)6166 static void emitTargetParallelRegion(CodeGenFunction &CGF,
6167 const OMPTargetParallelDirective &S,
6168 PrePostActionTy &Action) {
6169 // Get the captured statement associated with the 'parallel' region.
6170 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
6171 Action.Enter(CGF);
6172 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6173 Action.Enter(CGF);
6174 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6175 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6176 CGF.EmitOMPPrivateClause(S, PrivateScope);
6177 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6178 (void)PrivateScope.Privatize();
6179 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6180 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6181 // TODO: Add support for clauses.
6182 CGF.EmitStmt(CS->getCapturedStmt());
6183 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
6184 };
6185 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
6186 emitEmptyBoundParameters);
6187 emitPostUpdateForReductionClause(CGF, S,
6188 [](CodeGenFunction &) { return nullptr; });
6189 }
6190
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)6191 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
6192 CodeGenModule &CGM, StringRef ParentName,
6193 const OMPTargetParallelDirective &S) {
6194 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6195 emitTargetParallelRegion(CGF, S, Action);
6196 };
6197 llvm::Function *Fn;
6198 llvm::Constant *Addr;
6199 // Emit target region as a standalone region.
6200 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6201 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6202 assert(Fn && Addr && "Target device function emission failed.");
6203 }
6204
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)6205 void CodeGenFunction::EmitOMPTargetParallelDirective(
6206 const OMPTargetParallelDirective &S) {
6207 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6208 emitTargetParallelRegion(CGF, S, Action);
6209 };
6210 emitCommonOMPTargetDirective(*this, S, CodeGen);
6211 }
6212
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)6213 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
6214 const OMPTargetParallelForDirective &S,
6215 PrePostActionTy &Action) {
6216 Action.Enter(CGF);
6217 // Emit directive as a combined directive that consists of two implicit
6218 // directives: 'parallel' with 'for' directive.
6219 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6220 Action.Enter(CGF);
6221 CodeGenFunction::OMPCancelStackRAII CancelRegion(
6222 CGF, OMPD_target_parallel_for, S.hasCancel());
6223 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6224 emitDispatchForLoopBounds);
6225 };
6226 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
6227 emitEmptyBoundParameters);
6228 }
6229
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)6230 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
6231 CodeGenModule &CGM, StringRef ParentName,
6232 const OMPTargetParallelForDirective &S) {
6233 // Emit SPMD target parallel for region as a standalone region.
6234 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6235 emitTargetParallelForRegion(CGF, S, Action);
6236 };
6237 llvm::Function *Fn;
6238 llvm::Constant *Addr;
6239 // Emit target region as a standalone region.
6240 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6241 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6242 assert(Fn && Addr && "Target device function emission failed.");
6243 }
6244
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)6245 void CodeGenFunction::EmitOMPTargetParallelForDirective(
6246 const OMPTargetParallelForDirective &S) {
6247 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6248 emitTargetParallelForRegion(CGF, S, Action);
6249 };
6250 emitCommonOMPTargetDirective(*this, S, CodeGen);
6251 }
6252
6253 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)6254 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
6255 const OMPTargetParallelForSimdDirective &S,
6256 PrePostActionTy &Action) {
6257 Action.Enter(CGF);
6258 // Emit directive as a combined directive that consists of two implicit
6259 // directives: 'parallel' with 'for' directive.
6260 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6261 Action.Enter(CGF);
6262 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
6263 emitDispatchForLoopBounds);
6264 };
6265 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
6266 emitEmptyBoundParameters);
6267 }
6268
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)6269 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
6270 CodeGenModule &CGM, StringRef ParentName,
6271 const OMPTargetParallelForSimdDirective &S) {
6272 // Emit SPMD target parallel for region as a standalone region.
6273 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6274 emitTargetParallelForSimdRegion(CGF, S, Action);
6275 };
6276 llvm::Function *Fn;
6277 llvm::Constant *Addr;
6278 // Emit target region as a standalone region.
6279 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6280 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6281 assert(Fn && Addr && "Target device function emission failed.");
6282 }
6283
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)6284 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
6285 const OMPTargetParallelForSimdDirective &S) {
6286 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6287 emitTargetParallelForSimdRegion(CGF, S, Action);
6288 };
6289 emitCommonOMPTargetDirective(*this, S, CodeGen);
6290 }
6291
6292 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)6293 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
6294 const ImplicitParamDecl *PVD,
6295 CodeGenFunction::OMPPrivateScope &Privates) {
6296 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
6297 Privates.addPrivate(VDecl,
6298 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
6299 }
6300
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)6301 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
6302 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
6303 // Emit outlined function for task construct.
6304 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
6305 Address CapturedStruct = Address::invalid();
6306 {
6307 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6308 CapturedStruct = GenerateCapturedStmtArgument(*CS);
6309 }
6310 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
6311 const Expr *IfCond = nullptr;
6312 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6313 if (C->getNameModifier() == OMPD_unknown ||
6314 C->getNameModifier() == OMPD_taskloop) {
6315 IfCond = C->getCondition();
6316 break;
6317 }
6318 }
6319
6320 OMPTaskDataTy Data;
6321 // Check if taskloop must be emitted without taskgroup.
6322 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
6323 // TODO: Check if we should emit tied or untied task.
6324 Data.Tied = true;
6325 // Set scheduling for taskloop
6326 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
6327 // grainsize clause
6328 Data.Schedule.setInt(/*IntVal=*/false);
6329 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
6330 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
6331 // num_tasks clause
6332 Data.Schedule.setInt(/*IntVal=*/true);
6333 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
6334 }
6335
6336 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
6337 // if (PreCond) {
6338 // for (IV in 0..LastIteration) BODY;
6339 // <Final counter/linear vars updates>;
6340 // }
6341 //
6342
6343 // Emit: if (PreCond) - begin.
6344 // If the condition constant folds and can be elided, avoid emitting the
6345 // whole loop.
6346 bool CondConstant;
6347 llvm::BasicBlock *ContBlock = nullptr;
6348 OMPLoopScope PreInitScope(CGF, S);
6349 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6350 if (!CondConstant)
6351 return;
6352 } else {
6353 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
6354 ContBlock = CGF.createBasicBlock("taskloop.if.end");
6355 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
6356 CGF.getProfileCount(&S));
6357 CGF.EmitBlock(ThenBlock);
6358 CGF.incrementProfileCounter(&S);
6359 }
6360
6361 (void)CGF.EmitOMPLinearClauseInit(S);
6362
6363 OMPPrivateScope LoopScope(CGF);
6364 // Emit helper vars inits.
6365 enum { LowerBound = 5, UpperBound, Stride, LastIter };
6366 auto *I = CS->getCapturedDecl()->param_begin();
6367 auto *LBP = std::next(I, LowerBound);
6368 auto *UBP = std::next(I, UpperBound);
6369 auto *STP = std::next(I, Stride);
6370 auto *LIP = std::next(I, LastIter);
6371 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
6372 LoopScope);
6373 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
6374 LoopScope);
6375 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
6376 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
6377 LoopScope);
6378 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
6379 CGF.EmitOMPLinearClause(S, LoopScope);
6380 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
6381 (void)LoopScope.Privatize();
6382 // Emit the loop iteration variable.
6383 const Expr *IVExpr = S.getIterationVariable();
6384 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
6385 CGF.EmitVarDecl(*IVDecl);
6386 CGF.EmitIgnoredExpr(S.getInit());
6387
6388 // Emit the iterations count variable.
6389 // If it is not a variable, Sema decided to calculate iterations count on
6390 // each iteration (e.g., it is foldable into a constant).
6391 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6392 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6393 // Emit calculation of the iterations count.
6394 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
6395 }
6396
6397 {
6398 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
6399 emitCommonSimdLoop(
6400 CGF, S,
6401 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6402 if (isOpenMPSimdDirective(S.getDirectiveKind()))
6403 CGF.EmitOMPSimdInit(S);
6404 },
6405 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
6406 CGF.EmitOMPInnerLoop(
6407 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
6408 [&S](CodeGenFunction &CGF) {
6409 emitOMPLoopBodyWithStopPoint(CGF, S,
6410 CodeGenFunction::JumpDest());
6411 },
6412 [](CodeGenFunction &) {});
6413 });
6414 }
6415 // Emit: if (PreCond) - end.
6416 if (ContBlock) {
6417 CGF.EmitBranch(ContBlock);
6418 CGF.EmitBlock(ContBlock, true);
6419 }
6420 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6421 if (HasLastprivateClause) {
6422 CGF.EmitOMPLastprivateClauseFinal(
6423 S, isOpenMPSimdDirective(S.getDirectiveKind()),
6424 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
6425 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6426 (*LIP)->getType(), S.getBeginLoc())));
6427 }
6428 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
6429 return CGF.Builder.CreateIsNotNull(
6430 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
6431 (*LIP)->getType(), S.getBeginLoc()));
6432 });
6433 };
6434 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
6435 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
6436 const OMPTaskDataTy &Data) {
6437 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
6438 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
6439 OMPLoopScope PreInitScope(CGF, S);
6440 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
6441 OutlinedFn, SharedsTy,
6442 CapturedStruct, IfCond, Data);
6443 };
6444 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
6445 CodeGen);
6446 };
6447 if (Data.Nogroup) {
6448 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
6449 } else {
6450 CGM.getOpenMPRuntime().emitTaskgroupRegion(
6451 *this,
6452 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
6453 PrePostActionTy &Action) {
6454 Action.Enter(CGF);
6455 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
6456 Data);
6457 },
6458 S.getBeginLoc());
6459 }
6460 }
6461
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)6462 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
6463 auto LPCRegion =
6464 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6465 EmitOMPTaskLoopBasedDirective(S);
6466 }
6467
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)6468 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
6469 const OMPTaskLoopSimdDirective &S) {
6470 auto LPCRegion =
6471 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6472 OMPLexicalScope Scope(*this, S);
6473 EmitOMPTaskLoopBasedDirective(S);
6474 }
6475
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)6476 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
6477 const OMPMasterTaskLoopDirective &S) {
6478 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6479 Action.Enter(CGF);
6480 EmitOMPTaskLoopBasedDirective(S);
6481 };
6482 auto LPCRegion =
6483 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6484 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
6485 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6486 }
6487
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)6488 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
6489 const OMPMasterTaskLoopSimdDirective &S) {
6490 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6491 Action.Enter(CGF);
6492 EmitOMPTaskLoopBasedDirective(S);
6493 };
6494 auto LPCRegion =
6495 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6496 OMPLexicalScope Scope(*this, S);
6497 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
6498 }
6499
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)6500 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
6501 const OMPParallelMasterTaskLoopDirective &S) {
6502 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6503 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6504 PrePostActionTy &Action) {
6505 Action.Enter(CGF);
6506 CGF.EmitOMPTaskLoopBasedDirective(S);
6507 };
6508 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6509 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6510 S.getBeginLoc());
6511 };
6512 auto LPCRegion =
6513 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6514 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
6515 emitEmptyBoundParameters);
6516 }
6517
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)6518 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
6519 const OMPParallelMasterTaskLoopSimdDirective &S) {
6520 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6521 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
6522 PrePostActionTy &Action) {
6523 Action.Enter(CGF);
6524 CGF.EmitOMPTaskLoopBasedDirective(S);
6525 };
6526 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
6527 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
6528 S.getBeginLoc());
6529 };
6530 auto LPCRegion =
6531 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
6532 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
6533 emitEmptyBoundParameters);
6534 }
6535
6536 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)6537 void CodeGenFunction::EmitOMPTargetUpdateDirective(
6538 const OMPTargetUpdateDirective &S) {
6539 // If we don't have target devices, don't bother emitting the data mapping
6540 // code.
6541 if (CGM.getLangOpts().OMPTargetTriples.empty())
6542 return;
6543
6544 // Check if we have any if clause associated with the directive.
6545 const Expr *IfCond = nullptr;
6546 if (const auto *C = S.getSingleClause<OMPIfClause>())
6547 IfCond = C->getCondition();
6548
6549 // Check if we have any device clause associated with the directive.
6550 const Expr *Device = nullptr;
6551 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6552 Device = C->getDevice();
6553
6554 OMPLexicalScope Scope(*this, S, OMPD_task);
6555 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
6556 }
6557
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)6558 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
6559 const OMPExecutableDirective &D) {
6560 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
6561 EmitOMPScanDirective(*SD);
6562 return;
6563 }
6564 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
6565 return;
6566 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
6567 OMPPrivateScope GlobalsScope(CGF);
6568 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
6569 // Capture global firstprivates to avoid crash.
6570 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
6571 for (const Expr *Ref : C->varlists()) {
6572 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
6573 if (!DRE)
6574 continue;
6575 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
6576 if (!VD || VD->hasLocalStorage())
6577 continue;
6578 if (!CGF.LocalDeclMap.count(VD)) {
6579 LValue GlobLVal = CGF.EmitLValue(Ref);
6580 GlobalsScope.addPrivate(
6581 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6582 }
6583 }
6584 }
6585 }
6586 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
6587 (void)GlobalsScope.Privatize();
6588 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
6589 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
6590 } else {
6591 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
6592 for (const Expr *E : LD->counters()) {
6593 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
6594 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
6595 LValue GlobLVal = CGF.EmitLValue(E);
6596 GlobalsScope.addPrivate(
6597 VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
6598 }
6599 if (isa<OMPCapturedExprDecl>(VD)) {
6600 // Emit only those that were not explicitly referenced in clauses.
6601 if (!CGF.LocalDeclMap.count(VD))
6602 CGF.EmitVarDecl(*VD);
6603 }
6604 }
6605 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
6606 if (!C->getNumForLoops())
6607 continue;
6608 for (unsigned I = LD->getCollapsedNumber(),
6609 E = C->getLoopNumIterations().size();
6610 I < E; ++I) {
6611 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
6612 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
6613 // Emit only those that were not explicitly referenced in clauses.
6614 if (!CGF.LocalDeclMap.count(VD))
6615 CGF.EmitVarDecl(*VD);
6616 }
6617 }
6618 }
6619 }
6620 (void)GlobalsScope.Privatize();
6621 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
6622 }
6623 };
6624 {
6625 auto LPCRegion =
6626 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
6627 OMPSimdLexicalScope Scope(*this, D);
6628 CGM.getOpenMPRuntime().emitInlinedDirective(
6629 *this,
6630 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
6631 : D.getDirectiveKind(),
6632 CodeGen);
6633 }
6634 // Check for outer lastprivate conditional update.
6635 checkForLastprivateConditionalUpdate(*this, D);
6636 }
6637