1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include <optional>
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm::omp;
41 
42 static const VarDecl *getBaseDecl(const Expr *Ref);
43 
44 namespace {
45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
46 /// for captured expressions.
47 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPExecutableDirective & S)48   void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
49     for (const auto *C : S.clauses()) {
50       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
51         if (const auto *PreInit =
52                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
53           for (const auto *I : PreInit->decls()) {
54             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
55               CGF.EmitVarDecl(cast<VarDecl>(*I));
56             } else {
57               CodeGenFunction::AutoVarEmission Emission =
58                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
59               CGF.EmitAutoVarCleanups(Emission);
60             }
61           }
62         }
63       }
64     }
65   }
66   CodeGenFunction::OMPPrivateScope InlinedShareds;
67 
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)68   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
69     return CGF.LambdaCaptureFields.lookup(VD) ||
70            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
71            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
72             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
73   }
74 
75 public:
OMPLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S,const std::optional<OpenMPDirectiveKind> CapturedRegion=std::nullopt,const bool EmitPreInitStmt=true)76   OMPLexicalScope(
77       CodeGenFunction &CGF, const OMPExecutableDirective &S,
78       const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
79       const bool EmitPreInitStmt = true)
80       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
81         InlinedShareds(CGF) {
82     if (EmitPreInitStmt)
83       emitPreInitStmt(CGF, S);
84     if (!CapturedRegion)
85       return;
86     assert(S.hasAssociatedStmt() &&
87            "Expected associated statement for inlined directive.");
88     const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
89     for (const auto &C : CS->captures()) {
90       if (C.capturesVariable() || C.capturesVariableByCopy()) {
91         auto *VD = C.getCapturedVar();
92         assert(VD == VD->getCanonicalDecl() &&
93                "Canonical decl must be captured.");
94         DeclRefExpr DRE(
95             CGF.getContext(), const_cast<VarDecl *>(VD),
96             isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
97                                        InlinedShareds.isGlobalVarCaptured(VD)),
98             VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
99         InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
100       }
101     }
102     (void)InlinedShareds.Privatize();
103   }
104 };
105 
106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
107 /// for captured expressions.
108 class OMPParallelScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)109   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
110     OpenMPDirectiveKind Kind = S.getDirectiveKind();
111     return !(isOpenMPTargetExecutionDirective(Kind) ||
112              isOpenMPLoopBoundSharingDirective(Kind)) &&
113            isOpenMPParallelDirective(Kind);
114   }
115 
116 public:
OMPParallelScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)117   OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
118       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
119                         EmitPreInitStmt(S)) {}
120 };
121 
122 /// Lexical scope for OpenMP teams construct, that handles correct codegen
123 /// for captured expressions.
124 class OMPTeamsScope final : public OMPLexicalScope {
EmitPreInitStmt(const OMPExecutableDirective & S)125   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
126     OpenMPDirectiveKind Kind = S.getDirectiveKind();
127     return !isOpenMPTargetExecutionDirective(Kind) &&
128            isOpenMPTeamsDirective(Kind);
129   }
130 
131 public:
OMPTeamsScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)132   OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
133       : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
134                         EmitPreInitStmt(S)) {}
135 };
136 
137 /// Private scope for OpenMP loop-based directives, that supports capturing
138 /// of used expression from loop statement.
139 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
emitPreInitStmt(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)140   void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
141     const DeclStmt *PreInits;
142     CodeGenFunction::OMPMapVars PreCondVars;
143     if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
144       llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
145       for (const auto *E : LD->counters()) {
146         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
147         EmittedAsPrivate.insert(VD->getCanonicalDecl());
148         (void)PreCondVars.setVarAddr(
149             CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
150       }
151       // Mark private vars as undefs.
152       for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
153         for (const Expr *IRef : C->varlists()) {
154           const auto *OrigVD =
155               cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
156           if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
157             QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
158             (void)PreCondVars.setVarAddr(
159                 CGF, OrigVD,
160                 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
161                             CGF.getContext().getPointerType(OrigVDTy))),
162                         CGF.ConvertTypeForMem(OrigVDTy),
163                         CGF.getContext().getDeclAlign(OrigVD)));
164           }
165         }
166       }
167       (void)PreCondVars.apply(CGF);
168       // Emit init, __range and __end variables for C++ range loops.
169       (void)OMPLoopBasedDirective::doForAllLoops(
170           LD->getInnermostCapturedStmt()->getCapturedStmt(),
171           /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
172           [&CGF](unsigned Cnt, const Stmt *CurStmt) {
173             if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
174               if (const Stmt *Init = CXXFor->getInit())
175                 CGF.EmitStmt(Init);
176               CGF.EmitStmt(CXXFor->getRangeStmt());
177               CGF.EmitStmt(CXXFor->getEndStmt());
178             }
179             return false;
180           });
181       PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
182     } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
183       PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
184     } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
185       PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
186     } else {
187       llvm_unreachable("Unknown loop-based directive kind.");
188     }
189     if (PreInits) {
190       for (const auto *I : PreInits->decls())
191         CGF.EmitVarDecl(cast<VarDecl>(*I));
192     }
193     PreCondVars.restore(CGF);
194   }
195 
196 public:
OMPLoopScope(CodeGenFunction & CGF,const OMPLoopBasedDirective & S)197   OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
198       : CodeGenFunction::RunCleanupsScope(CGF) {
199     emitPreInitStmt(CGF, S);
200   }
201 };
202 
203 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
204   CodeGenFunction::OMPPrivateScope InlinedShareds;
205 
isCapturedVar(CodeGenFunction & CGF,const VarDecl * VD)206   static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
207     return CGF.LambdaCaptureFields.lookup(VD) ||
208            (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
209            (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
210             cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
211   }
212 
213 public:
OMPSimdLexicalScope(CodeGenFunction & CGF,const OMPExecutableDirective & S)214   OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
215       : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
216         InlinedShareds(CGF) {
217     for (const auto *C : S.clauses()) {
218       if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
219         if (const auto *PreInit =
220                 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
221           for (const auto *I : PreInit->decls()) {
222             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
223               CGF.EmitVarDecl(cast<VarDecl>(*I));
224             } else {
225               CodeGenFunction::AutoVarEmission Emission =
226                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
227               CGF.EmitAutoVarCleanups(Emission);
228             }
229           }
230         }
231       } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
232         for (const Expr *E : UDP->varlists()) {
233           const Decl *D = cast<DeclRefExpr>(E)->getDecl();
234           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
235             CGF.EmitVarDecl(*OED);
236         }
237       } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
238         for (const Expr *E : UDP->varlists()) {
239           const Decl *D = getBaseDecl(E);
240           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
241             CGF.EmitVarDecl(*OED);
242         }
243       }
244     }
245     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
246       CGF.EmitOMPPrivateClause(S, InlinedShareds);
247     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
248       if (const Expr *E = TG->getReductionRef())
249         CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
250     }
251     // Temp copy arrays for inscan reductions should not be emitted as they are
252     // not used in simd only mode.
253     llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
254     for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
255       if (C->getModifier() != OMPC_REDUCTION_inscan)
256         continue;
257       for (const Expr *E : C->copy_array_temps())
258         CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
259     }
260     const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
261     while (CS) {
262       for (auto &C : CS->captures()) {
263         if (C.capturesVariable() || C.capturesVariableByCopy()) {
264           auto *VD = C.getCapturedVar();
265           if (CopyArrayTemps.contains(VD))
266             continue;
267           assert(VD == VD->getCanonicalDecl() &&
268                  "Canonical decl must be captured.");
269           DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
270                           isCapturedVar(CGF, VD) ||
271                               (CGF.CapturedStmtInfo &&
272                                InlinedShareds.isGlobalVarCaptured(VD)),
273                           VD->getType().getNonReferenceType(), VK_LValue,
274                           C.getLocation());
275           InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
276         }
277       }
278       CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
279     }
280     (void)InlinedShareds.Privatize();
281   }
282 };
283 
284 } // namespace
285 
286 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
287                                          const OMPExecutableDirective &S,
288                                          const RegionCodeGenTy &CodeGen);
289 
EmitOMPSharedLValue(const Expr * E)290 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
291   if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
292     if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
293       OrigVD = OrigVD->getCanonicalDecl();
294       bool IsCaptured =
295           LambdaCaptureFields.lookup(OrigVD) ||
296           (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
297           (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
298       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
299                       OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
300       return EmitLValue(&DRE);
301     }
302   }
303   return EmitLValue(E);
304 }
305 
getTypeSize(QualType Ty)306 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
307   ASTContext &C = getContext();
308   llvm::Value *Size = nullptr;
309   auto SizeInChars = C.getTypeSizeInChars(Ty);
310   if (SizeInChars.isZero()) {
311     // getTypeSizeInChars() returns 0 for a VLA.
312     while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
313       VlaSizePair VlaSize = getVLASize(VAT);
314       Ty = VlaSize.Type;
315       Size =
316           Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
317     }
318     SizeInChars = C.getTypeSizeInChars(Ty);
319     if (SizeInChars.isZero())
320       return llvm::ConstantInt::get(SizeTy, /*V=*/0);
321     return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
322   }
323   return CGM.getSize(SizeInChars);
324 }
325 
GenerateOpenMPCapturedVars(const CapturedStmt & S,SmallVectorImpl<llvm::Value * > & CapturedVars)326 void CodeGenFunction::GenerateOpenMPCapturedVars(
327     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
328   const RecordDecl *RD = S.getCapturedRecordDecl();
329   auto CurField = RD->field_begin();
330   auto CurCap = S.captures().begin();
331   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
332                                                  E = S.capture_init_end();
333        I != E; ++I, ++CurField, ++CurCap) {
334     if (CurField->hasCapturedVLAType()) {
335       const VariableArrayType *VAT = CurField->getCapturedVLAType();
336       llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
337       CapturedVars.push_back(Val);
338     } else if (CurCap->capturesThis()) {
339       CapturedVars.push_back(CXXThisValue);
340     } else if (CurCap->capturesVariableByCopy()) {
341       llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
342 
343       // If the field is not a pointer, we need to save the actual value
344       // and load it as a void pointer.
345       if (!CurField->getType()->isAnyPointerType()) {
346         ASTContext &Ctx = getContext();
347         Address DstAddr = CreateMemTemp(
348             Ctx.getUIntPtrType(),
349             Twine(CurCap->getCapturedVar()->getName(), ".casted"));
350         LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
351 
352         llvm::Value *SrcAddrVal = EmitScalarConversion(
353             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
354             Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
355         LValue SrcLV =
356             MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
357 
358         // Store the value using the source type pointer.
359         EmitStoreThroughLValue(RValue::get(CV), SrcLV);
360 
361         // Load the value using the destination type pointer.
362         CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
363       }
364       CapturedVars.push_back(CV);
365     } else {
366       assert(CurCap->capturesVariable() && "Expected capture by reference.");
367       CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
368     }
369   }
370 }
371 
castValueFromUintptr(CodeGenFunction & CGF,SourceLocation Loc,QualType DstType,StringRef Name,LValue AddrLV)372 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
373                                     QualType DstType, StringRef Name,
374                                     LValue AddrLV) {
375   ASTContext &Ctx = CGF.getContext();
376 
377   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
378       AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
379       Ctx.getPointerType(DstType), Loc);
380   Address TmpAddr =
381       CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
382   return TmpAddr;
383 }
384 
getCanonicalParamType(ASTContext & C,QualType T)385 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
386   if (T->isLValueReferenceType())
387     return C.getLValueReferenceType(
388         getCanonicalParamType(C, T.getNonReferenceType()),
389         /*SpelledAsLValue=*/false);
390   if (T->isPointerType())
391     return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
392   if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
393     if (const auto *VLA = dyn_cast<VariableArrayType>(A))
394       return getCanonicalParamType(C, VLA->getElementType());
395     if (!A->isVariablyModifiedType())
396       return C.getCanonicalType(T);
397   }
398   return C.getCanonicalParamType(T);
399 }
400 
401 namespace {
402 /// Contains required data for proper outlined function codegen.
403 struct FunctionOptions {
404   /// Captured statement for which the function is generated.
405   const CapturedStmt *S = nullptr;
406   /// true if cast to/from  UIntPtr is required for variables captured by
407   /// value.
408   const bool UIntPtrCastRequired = true;
409   /// true if only casted arguments must be registered as local args or VLA
410   /// sizes.
411   const bool RegisterCastedArgsOnly = false;
412   /// Name of the generated function.
413   const StringRef FunctionName;
414   /// Location of the non-debug version of the outlined function.
415   SourceLocation Loc;
FunctionOptions__anona385fc870311::FunctionOptions416   explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
417                            bool RegisterCastedArgsOnly, StringRef FunctionName,
418                            SourceLocation Loc)
419       : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
420         RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
421         FunctionName(FunctionName), Loc(Loc) {}
422 };
423 } // namespace
424 
emitOutlinedFunctionPrologue(CodeGenFunction & CGF,FunctionArgList & Args,llvm::MapVector<const Decl *,std::pair<const VarDecl *,Address>> & LocalAddrs,llvm::DenseMap<const Decl *,std::pair<const Expr *,llvm::Value * >> & VLASizes,llvm::Value * & CXXThisValue,const FunctionOptions & FO)425 static llvm::Function *emitOutlinedFunctionPrologue(
426     CodeGenFunction &CGF, FunctionArgList &Args,
427     llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
428         &LocalAddrs,
429     llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
430         &VLASizes,
431     llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
432   const CapturedDecl *CD = FO.S->getCapturedDecl();
433   const RecordDecl *RD = FO.S->getCapturedRecordDecl();
434   assert(CD->hasBody() && "missing CapturedDecl body");
435 
436   CXXThisValue = nullptr;
437   // Build the argument list.
438   CodeGenModule &CGM = CGF.CGM;
439   ASTContext &Ctx = CGM.getContext();
440   FunctionArgList TargetArgs;
441   Args.append(CD->param_begin(),
442               std::next(CD->param_begin(), CD->getContextParamPosition()));
443   TargetArgs.append(
444       CD->param_begin(),
445       std::next(CD->param_begin(), CD->getContextParamPosition()));
446   auto I = FO.S->captures().begin();
447   FunctionDecl *DebugFunctionDecl = nullptr;
448   if (!FO.UIntPtrCastRequired) {
449     FunctionProtoType::ExtProtoInfo EPI;
450     QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
451     DebugFunctionDecl = FunctionDecl::Create(
452         Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
453         SourceLocation(), DeclarationName(), FunctionTy,
454         Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
455         /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456         /*hasWrittenPrototype=*/false);
457   }
458   for (const FieldDecl *FD : RD->fields()) {
459     QualType ArgType = FD->getType();
460     IdentifierInfo *II = nullptr;
461     VarDecl *CapVar = nullptr;
462 
463     // If this is a capture by copy and the type is not a pointer, the outlined
464     // function argument type should be uintptr and the value properly casted to
465     // uintptr. This is necessary given that the runtime library is only able to
466     // deal with pointers. We can pass in the same way the VLA type sizes to the
467     // outlined function.
468     if (FO.UIntPtrCastRequired &&
469         ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
470          I->capturesVariableArrayType()))
471       ArgType = Ctx.getUIntPtrType();
472 
473     if (I->capturesVariable() || I->capturesVariableByCopy()) {
474       CapVar = I->getCapturedVar();
475       II = CapVar->getIdentifier();
476     } else if (I->capturesThis()) {
477       II = &Ctx.Idents.get("this");
478     } else {
479       assert(I->capturesVariableArrayType());
480       II = &Ctx.Idents.get("vla");
481     }
482     if (ArgType->isVariablyModifiedType())
483       ArgType = getCanonicalParamType(Ctx, ArgType);
484     VarDecl *Arg;
485     if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
486       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
487                                       II, ArgType,
488                                       ImplicitParamKind::ThreadPrivateVar);
489     } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
490       Arg = ParmVarDecl::Create(
491           Ctx, DebugFunctionDecl,
492           CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
493           CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
494           /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
495     } else {
496       Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
497                                       II, ArgType, ImplicitParamKind::Other);
498     }
499     Args.emplace_back(Arg);
500     // Do not cast arguments if we emit function with non-original types.
501     TargetArgs.emplace_back(
502         FO.UIntPtrCastRequired
503             ? Arg
504             : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
505     ++I;
506   }
507   Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
508               CD->param_end());
509   TargetArgs.append(
510       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
511       CD->param_end());
512 
513   // Create the function declaration.
514   const CGFunctionInfo &FuncInfo =
515       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
516   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
517 
518   auto *F =
519       llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
520                              FO.FunctionName, &CGM.getModule());
521   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
522   if (CD->isNothrow())
523     F->setDoesNotThrow();
524   F->setDoesNotRecurse();
525 
526   // Always inline the outlined function if optimizations are enabled.
527   if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
528     F->removeFnAttr(llvm::Attribute::NoInline);
529     F->addFnAttr(llvm::Attribute::AlwaysInline);
530   }
531 
532   // Generate the function.
533   CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
534                     FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
535                     FO.UIntPtrCastRequired ? FO.Loc
536                                            : CD->getBody()->getBeginLoc());
537   unsigned Cnt = CD->getContextParamPosition();
538   I = FO.S->captures().begin();
539   for (const FieldDecl *FD : RD->fields()) {
540     // Do not map arguments if we emit function with non-original types.
541     Address LocalAddr(Address::invalid());
542     if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
543       LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
544                                                              TargetArgs[Cnt]);
545     } else {
546       LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
547     }
548     // If we are capturing a pointer by copy we don't need to do anything, just
549     // use the value that we get from the arguments.
550     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
551       const VarDecl *CurVD = I->getCapturedVar();
552       if (!FO.RegisterCastedArgsOnly)
553         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
554       ++Cnt;
555       ++I;
556       continue;
557     }
558 
559     LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
560                                         AlignmentSource::Decl);
561     if (FD->hasCapturedVLAType()) {
562       if (FO.UIntPtrCastRequired) {
563         ArgLVal = CGF.MakeAddrLValue(
564             castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
565                                  Args[Cnt]->getName(), ArgLVal),
566             FD->getType(), AlignmentSource::Decl);
567       }
568       llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
569       const VariableArrayType *VAT = FD->getCapturedVLAType();
570       VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
571     } else if (I->capturesVariable()) {
572       const VarDecl *Var = I->getCapturedVar();
573       QualType VarTy = Var->getType();
574       Address ArgAddr = ArgLVal.getAddress(CGF);
575       if (ArgLVal.getType()->isLValueReferenceType()) {
576         ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
577       } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
578         assert(ArgLVal.getType()->isPointerType());
579         ArgAddr = CGF.EmitLoadOfPointer(
580             ArgAddr, ArgLVal.getType()->castAs<PointerType>());
581       }
582       if (!FO.RegisterCastedArgsOnly) {
583         LocalAddrs.insert(
584             {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
585       }
586     } else if (I->capturesVariableByCopy()) {
587       assert(!FD->getType()->isAnyPointerType() &&
588              "Not expecting a captured pointer.");
589       const VarDecl *Var = I->getCapturedVar();
590       LocalAddrs.insert({Args[Cnt],
591                          {Var, FO.UIntPtrCastRequired
592                                    ? castValueFromUintptr(
593                                          CGF, I->getLocation(), FD->getType(),
594                                          Args[Cnt]->getName(), ArgLVal)
595                                    : ArgLVal.getAddress(CGF)}});
596     } else {
597       // If 'this' is captured, load it into CXXThisValue.
598       assert(I->capturesThis());
599       CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
600       LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
601     }
602     ++Cnt;
603     ++I;
604   }
605 
606   return F;
607 }
608 
609 llvm::Function *
GenerateOpenMPCapturedStmtFunction(const CapturedStmt & S,SourceLocation Loc)610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
611                                                     SourceLocation Loc) {
612   assert(
613       CapturedStmtInfo &&
614       "CapturedStmtInfo should be set when generating the captured function");
615   const CapturedDecl *CD = S.getCapturedDecl();
616   // Build the argument list.
617   bool NeedWrapperFunction =
618       getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
619   FunctionArgList Args;
620   llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
621   llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
622   SmallString<256> Buffer;
623   llvm::raw_svector_ostream Out(Buffer);
624   Out << CapturedStmtInfo->getHelperName();
625   if (NeedWrapperFunction)
626     Out << "_debug__";
627   FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
628                      Out.str(), Loc);
629   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
630                                                    VLASizes, CXXThisValue, FO);
631   CodeGenFunction::OMPPrivateScope LocalScope(*this);
632   for (const auto &LocalAddrPair : LocalAddrs) {
633     if (LocalAddrPair.second.first) {
634       LocalScope.addPrivate(LocalAddrPair.second.first,
635                             LocalAddrPair.second.second);
636     }
637   }
638   (void)LocalScope.Privatize();
639   for (const auto &VLASizePair : VLASizes)
640     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
641   PGO.assignRegionCounters(GlobalDecl(CD), F);
642   CapturedStmtInfo->EmitBody(*this, CD->getBody());
643   (void)LocalScope.ForceCleanup();
644   FinishFunction(CD->getBodyRBrace());
645   if (!NeedWrapperFunction)
646     return F;
647 
648   FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
649                             /*RegisterCastedArgsOnly=*/true,
650                             CapturedStmtInfo->getHelperName(), Loc);
651   CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652   WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
653   Args.clear();
654   LocalAddrs.clear();
655   VLASizes.clear();
656   llvm::Function *WrapperF =
657       emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
658                                    WrapperCGF.CXXThisValue, WrapperFO);
659   llvm::SmallVector<llvm::Value *, 4> CallArgs;
660   auto *PI = F->arg_begin();
661   for (const auto *Arg : Args) {
662     llvm::Value *CallArg;
663     auto I = LocalAddrs.find(Arg);
664     if (I != LocalAddrs.end()) {
665       LValue LV = WrapperCGF.MakeAddrLValue(
666           I->second.second,
667           I->second.first ? I->second.first->getType() : Arg->getType(),
668           AlignmentSource::Decl);
669       if (LV.getType()->isAnyComplexType())
670         LV.setAddress(LV.getAddress(WrapperCGF).withElementType(PI->getType()));
671       CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
672     } else {
673       auto EI = VLASizes.find(Arg);
674       if (EI != VLASizes.end()) {
675         CallArg = EI->second.second;
676       } else {
677         LValue LV =
678             WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
679                                       Arg->getType(), AlignmentSource::Decl);
680         CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
681       }
682     }
683     CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
684     ++PI;
685   }
686   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
687   WrapperCGF.FinishFunction();
688   return WrapperF;
689 }
690 
691 //===----------------------------------------------------------------------===//
692 //                              OpenMP Directive Emission
693 //===----------------------------------------------------------------------===//
EmitOMPAggregateAssign(Address DestAddr,Address SrcAddr,QualType OriginalType,const llvm::function_ref<void (Address,Address)> CopyGen)694 void CodeGenFunction::EmitOMPAggregateAssign(
695     Address DestAddr, Address SrcAddr, QualType OriginalType,
696     const llvm::function_ref<void(Address, Address)> CopyGen) {
697   // Perform element-by-element initialization.
698   QualType ElementTy;
699 
700   // Drill down to the base element type on both arrays.
701   const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
702   llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
703   SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
704 
705   llvm::Value *SrcBegin = SrcAddr.getPointer();
706   llvm::Value *DestBegin = DestAddr.getPointer();
707   // Cast from pointer to array type to pointer to single element.
708   llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
709                                                    DestBegin, NumElements);
710 
711   // The basic structure here is a while-do loop.
712   llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
713   llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
714   llvm::Value *IsEmpty =
715       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
716   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
717 
718   // Enter the loop body, making that address the current address.
719   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
720   EmitBlock(BodyBB);
721 
722   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
723 
724   llvm::PHINode *SrcElementPHI =
725       Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
726   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
727   Address SrcElementCurrent =
728       Address(SrcElementPHI, SrcAddr.getElementType(),
729               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   llvm::PHINode *DestElementPHI = Builder.CreatePHI(
732       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
733   DestElementPHI->addIncoming(DestBegin, EntryBB);
734   Address DestElementCurrent =
735       Address(DestElementPHI, DestAddr.getElementType(),
736               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
737 
738   // Emit copy.
739   CopyGen(DestElementCurrent, SrcElementCurrent);
740 
741   // Shift the address forward by one element.
742   llvm::Value *DestElementNext =
743       Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
744                                  /*Idx0=*/1, "omp.arraycpy.dest.element");
745   llvm::Value *SrcElementNext =
746       Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
747                                  /*Idx0=*/1, "omp.arraycpy.src.element");
748   // Check whether we've reached the end.
749   llvm::Value *Done =
750       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
751   Builder.CreateCondBr(Done, DoneBB, BodyBB);
752   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
753   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
754 
755   // Done.
756   EmitBlock(DoneBB, /*IsFinished=*/true);
757 }
758 
EmitOMPCopy(QualType OriginalType,Address DestAddr,Address SrcAddr,const VarDecl * DestVD,const VarDecl * SrcVD,const Expr * Copy)759 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
760                                   Address SrcAddr, const VarDecl *DestVD,
761                                   const VarDecl *SrcVD, const Expr *Copy) {
762   if (OriginalType->isArrayType()) {
763     const auto *BO = dyn_cast<BinaryOperator>(Copy);
764     if (BO && BO->getOpcode() == BO_Assign) {
765       // Perform simple memcpy for simple copying.
766       LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
767       LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
768       EmitAggregateAssign(Dest, Src, OriginalType);
769     } else {
770       // For arrays with complex element types perform element by element
771       // copying.
772       EmitOMPAggregateAssign(
773           DestAddr, SrcAddr, OriginalType,
774           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
775             // Working with the single array element, so have to remap
776             // destination and source variables to corresponding array
777             // elements.
778             CodeGenFunction::OMPPrivateScope Remap(*this);
779             Remap.addPrivate(DestVD, DestElement);
780             Remap.addPrivate(SrcVD, SrcElement);
781             (void)Remap.Privatize();
782             EmitIgnoredExpr(Copy);
783           });
784     }
785   } else {
786     // Remap pseudo source variable to private copy.
787     CodeGenFunction::OMPPrivateScope Remap(*this);
788     Remap.addPrivate(SrcVD, SrcAddr);
789     Remap.addPrivate(DestVD, DestAddr);
790     (void)Remap.Privatize();
791     // Emit copying of the whole variable.
792     EmitIgnoredExpr(Copy);
793   }
794 }
795 
EmitOMPFirstprivateClause(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)796 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
797                                                 OMPPrivateScope &PrivateScope) {
798   if (!HaveInsertPoint())
799     return false;
800   bool DeviceConstTarget =
801       getLangOpts().OpenMPIsTargetDevice &&
802       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
803   bool FirstprivateIsLastprivate = false;
804   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
805   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
806     for (const auto *D : C->varlists())
807       Lastprivates.try_emplace(
808           cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
809           C->getKind());
810   }
811   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
812   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
813   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
814   // Force emission of the firstprivate copy if the directive does not emit
815   // outlined function, like omp for, omp simd, omp distribute etc.
816   bool MustEmitFirstprivateCopy =
817       CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
818   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
819     const auto *IRef = C->varlist_begin();
820     const auto *InitsRef = C->inits().begin();
821     for (const Expr *IInit : C->private_copies()) {
822       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
823       bool ThisFirstprivateIsLastprivate =
824           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
825       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
826       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
827       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
828           !FD->getType()->isReferenceType() &&
829           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
830         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
831         ++IRef;
832         ++InitsRef;
833         continue;
834       }
835       // Do not emit copy for firstprivate constant variables in target regions,
836       // captured by reference.
837       if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
838           FD && FD->getType()->isReferenceType() &&
839           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
840         EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
841         ++IRef;
842         ++InitsRef;
843         continue;
844       }
845       FirstprivateIsLastprivate =
846           FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
847       if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
848         const auto *VDInit =
849             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
850         bool IsRegistered;
851         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
852                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
853                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
854         LValue OriginalLVal;
855         if (!FD) {
856           // Check if the firstprivate variable is just a constant value.
857           ConstantEmission CE = tryEmitAsConstant(&DRE);
858           if (CE && !CE.isReference()) {
859             // Constant value, no need to create a copy.
860             ++IRef;
861             ++InitsRef;
862             continue;
863           }
864           if (CE && CE.isReference()) {
865             OriginalLVal = CE.getReferenceLValue(*this, &DRE);
866           } else {
867             assert(!CE && "Expected non-constant firstprivate.");
868             OriginalLVal = EmitLValue(&DRE);
869           }
870         } else {
871           OriginalLVal = EmitLValue(&DRE);
872         }
873         QualType Type = VD->getType();
874         if (Type->isArrayType()) {
875           // Emit VarDecl with copy init for arrays.
876           // Get the address of the original variable captured in current
877           // captured region.
878           AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
879           const Expr *Init = VD->getInit();
880           if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
881             // Perform simple memcpy.
882             LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
883             EmitAggregateAssign(Dest, OriginalLVal, Type);
884           } else {
885             EmitOMPAggregateAssign(
886                 Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this),
887                 Type,
888                 [this, VDInit, Init](Address DestElement, Address SrcElement) {
889                   // Clean up any temporaries needed by the
890                   // initialization.
891                   RunCleanupsScope InitScope(*this);
892                   // Emit initialization for single element.
893                   setAddrOfLocalVar(VDInit, SrcElement);
894                   EmitAnyExprToMem(Init, DestElement,
895                                    Init->getType().getQualifiers(),
896                                    /*IsInitializer*/ false);
897                   LocalDeclMap.erase(VDInit);
898                 });
899           }
900           EmitAutoVarCleanups(Emission);
901           IsRegistered =
902               PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
903         } else {
904           Address OriginalAddr = OriginalLVal.getAddress(*this);
905           // Emit private VarDecl with copy init.
906           // Remap temp VDInit variable to the address of the original
907           // variable (for proper handling of captured global variables).
908           setAddrOfLocalVar(VDInit, OriginalAddr);
909           EmitDecl(*VD);
910           LocalDeclMap.erase(VDInit);
911           Address VDAddr = GetAddrOfLocalVar(VD);
912           if (ThisFirstprivateIsLastprivate &&
913               Lastprivates[OrigVD->getCanonicalDecl()] ==
914                   OMPC_LASTPRIVATE_conditional) {
915             // Create/init special variable for lastprivate conditionals.
916             llvm::Value *V =
917                 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
918                                                 AlignmentSource::Decl),
919                                  (*IRef)->getExprLoc());
920             VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
921                 *this, OrigVD);
922             EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
923                                                 AlignmentSource::Decl));
924             LocalDeclMap.erase(VD);
925             setAddrOfLocalVar(VD, VDAddr);
926           }
927           IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
928         }
929         assert(IsRegistered &&
930                "firstprivate var already registered as private");
931         // Silence the warning about unused variable.
932         (void)IsRegistered;
933       }
934       ++IRef;
935       ++InitsRef;
936     }
937   }
938   return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
939 }
940 
EmitOMPPrivateClause(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)941 void CodeGenFunction::EmitOMPPrivateClause(
942     const OMPExecutableDirective &D,
943     CodeGenFunction::OMPPrivateScope &PrivateScope) {
944   if (!HaveInsertPoint())
945     return;
946   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
947   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
948     auto IRef = C->varlist_begin();
949     for (const Expr *IInit : C->private_copies()) {
950       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
951       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
952         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
953         EmitDecl(*VD);
954         // Emit private VarDecl with copy init.
955         bool IsRegistered =
956             PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
957         assert(IsRegistered && "private var already registered as private");
958         // Silence the warning about unused variable.
959         (void)IsRegistered;
960       }
961       ++IRef;
962     }
963   }
964 }
965 
EmitOMPCopyinClause(const OMPExecutableDirective & D)966 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
967   if (!HaveInsertPoint())
968     return false;
969   // threadprivate_var1 = master_threadprivate_var1;
970   // operator=(threadprivate_var2, master_threadprivate_var2);
971   // ...
972   // __kmpc_barrier(&loc, global_tid);
973   llvm::DenseSet<const VarDecl *> CopiedVars;
974   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
975   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
976     auto IRef = C->varlist_begin();
977     auto ISrcRef = C->source_exprs().begin();
978     auto IDestRef = C->destination_exprs().begin();
979     for (const Expr *AssignOp : C->assignment_ops()) {
980       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
981       QualType Type = VD->getType();
982       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
983         // Get the address of the master variable. If we are emitting code with
984         // TLS support, the address is passed from the master as field in the
985         // captured declaration.
986         Address MasterAddr = Address::invalid();
987         if (getLangOpts().OpenMPUseTLS &&
988             getContext().getTargetInfo().isTLSSupported()) {
989           assert(CapturedStmtInfo->lookup(VD) &&
990                  "Copyin threadprivates should have been captured!");
991           DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
992                           (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
993           MasterAddr = EmitLValue(&DRE).getAddress(*this);
994           LocalDeclMap.erase(VD);
995         } else {
996           MasterAddr =
997               Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
998                                           : CGM.GetAddrOfGlobal(VD),
999                       CGM.getTypes().ConvertTypeForMem(VD->getType()),
1000                       getContext().getDeclAlign(VD));
1001         }
1002         // Get the address of the threadprivate variable.
1003         Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1004         if (CopiedVars.size() == 1) {
1005           // At first check if current thread is a master thread. If it is, no
1006           // need to copy data.
1007           CopyBegin = createBasicBlock("copyin.not.master");
1008           CopyEnd = createBasicBlock("copyin.not.master.end");
1009           // TODO: Avoid ptrtoint conversion.
1010           auto *MasterAddrInt =
1011               Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1012           auto *PrivateAddrInt =
1013               Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1014           Builder.CreateCondBr(
1015               Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1016               CopyEnd);
1017           EmitBlock(CopyBegin);
1018         }
1019         const auto *SrcVD =
1020             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1021         const auto *DestVD =
1022             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1023         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1024       }
1025       ++IRef;
1026       ++ISrcRef;
1027       ++IDestRef;
1028     }
1029   }
1030   if (CopyEnd) {
1031     // Exit out of copying procedure for non-master thread.
1032     EmitBlock(CopyEnd, /*IsFinished=*/true);
1033     return true;
1034   }
1035   return false;
1036 }
1037 
EmitOMPLastprivateClauseInit(const OMPExecutableDirective & D,OMPPrivateScope & PrivateScope)1038 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1039     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1040   if (!HaveInsertPoint())
1041     return false;
1042   bool HasAtLeastOneLastprivate = false;
1043   llvm::DenseSet<const VarDecl *> SIMDLCVs;
1044   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1045     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1046     for (const Expr *C : LoopDirective->counters()) {
1047       SIMDLCVs.insert(
1048           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1049     }
1050   }
1051   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1052   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1053     HasAtLeastOneLastprivate = true;
1054     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1055         !getLangOpts().OpenMPSimd)
1056       break;
1057     const auto *IRef = C->varlist_begin();
1058     const auto *IDestRef = C->destination_exprs().begin();
1059     for (const Expr *IInit : C->private_copies()) {
1060       // Keep the address of the original variable for future update at the end
1061       // of the loop.
1062       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1063       // Taskloops do not require additional initialization, it is done in
1064       // runtime support library.
1065       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1066         const auto *DestVD =
1067             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1068         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1069                         /*RefersToEnclosingVariableOrCapture=*/
1070                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
1071                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1072         PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this));
1073         // Check if the variable is also a firstprivate: in this case IInit is
1074         // not generated. Initialization of this variable will happen in codegen
1075         // for 'firstprivate' clause.
1076         if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1077           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1078           Address VDAddr = Address::invalid();
1079           if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1080             VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1081                 *this, OrigVD);
1082             setAddrOfLocalVar(VD, VDAddr);
1083           } else {
1084             // Emit private VarDecl with copy init.
1085             EmitDecl(*VD);
1086             VDAddr = GetAddrOfLocalVar(VD);
1087           }
1088           bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1089           assert(IsRegistered &&
1090                  "lastprivate var already registered as private");
1091           (void)IsRegistered;
1092         }
1093       }
1094       ++IRef;
1095       ++IDestRef;
1096     }
1097   }
1098   return HasAtLeastOneLastprivate;
1099 }
1100 
EmitOMPLastprivateClauseFinal(const OMPExecutableDirective & D,bool NoFinals,llvm::Value * IsLastIterCond)1101 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1102     const OMPExecutableDirective &D, bool NoFinals,
1103     llvm::Value *IsLastIterCond) {
1104   if (!HaveInsertPoint())
1105     return;
1106   // Emit following code:
1107   // if (<IsLastIterCond>) {
1108   //   orig_var1 = private_orig_var1;
1109   //   ...
1110   //   orig_varn = private_orig_varn;
1111   // }
1112   llvm::BasicBlock *ThenBB = nullptr;
1113   llvm::BasicBlock *DoneBB = nullptr;
1114   if (IsLastIterCond) {
1115     // Emit implicit barrier if at least one lastprivate conditional is found
1116     // and this is not a simd mode.
1117     if (!getLangOpts().OpenMPSimd &&
1118         llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1119                      [](const OMPLastprivateClause *C) {
1120                        return C->getKind() == OMPC_LASTPRIVATE_conditional;
1121                      })) {
1122       CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1123                                              OMPD_unknown,
1124                                              /*EmitChecks=*/false,
1125                                              /*ForceSimpleCall=*/true);
1126     }
1127     ThenBB = createBasicBlock(".omp.lastprivate.then");
1128     DoneBB = createBasicBlock(".omp.lastprivate.done");
1129     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1130     EmitBlock(ThenBB);
1131   }
1132   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1133   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1134   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1135     auto IC = LoopDirective->counters().begin();
1136     for (const Expr *F : LoopDirective->finals()) {
1137       const auto *D =
1138           cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1139       if (NoFinals)
1140         AlreadyEmittedVars.insert(D);
1141       else
1142         LoopCountersAndUpdates[D] = F;
1143       ++IC;
1144     }
1145   }
1146   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1147     auto IRef = C->varlist_begin();
1148     auto ISrcRef = C->source_exprs().begin();
1149     auto IDestRef = C->destination_exprs().begin();
1150     for (const Expr *AssignOp : C->assignment_ops()) {
1151       const auto *PrivateVD =
1152           cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1153       QualType Type = PrivateVD->getType();
1154       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1155       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1156         // If lastprivate variable is a loop control variable for loop-based
1157         // directive, update its value before copyin back to original
1158         // variable.
1159         if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1160           EmitIgnoredExpr(FinalExpr);
1161         const auto *SrcVD =
1162             cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1163         const auto *DestVD =
1164             cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1165         // Get the address of the private variable.
1166         Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1167         if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1168           PrivateAddr = Address(
1169               Builder.CreateLoad(PrivateAddr),
1170               CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1171               CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1172         // Store the last value to the private copy in the last iteration.
1173         if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1174           CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1175               *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1176               (*IRef)->getExprLoc());
1177         // Get the address of the original variable.
1178         Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1179         EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1180       }
1181       ++IRef;
1182       ++ISrcRef;
1183       ++IDestRef;
1184     }
1185     if (const Expr *PostUpdate = C->getPostUpdateExpr())
1186       EmitIgnoredExpr(PostUpdate);
1187   }
1188   if (IsLastIterCond)
1189     EmitBlock(DoneBB, /*IsFinished=*/true);
1190 }
1191 
EmitOMPReductionClauseInit(const OMPExecutableDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope,bool ForInscan)1192 void CodeGenFunction::EmitOMPReductionClauseInit(
1193     const OMPExecutableDirective &D,
1194     CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1195   if (!HaveInsertPoint())
1196     return;
1197   SmallVector<const Expr *, 4> Shareds;
1198   SmallVector<const Expr *, 4> Privates;
1199   SmallVector<const Expr *, 4> ReductionOps;
1200   SmallVector<const Expr *, 4> LHSs;
1201   SmallVector<const Expr *, 4> RHSs;
1202   OMPTaskDataTy Data;
1203   SmallVector<const Expr *, 4> TaskLHSs;
1204   SmallVector<const Expr *, 4> TaskRHSs;
1205   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1206     if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1207       continue;
1208     Shareds.append(C->varlist_begin(), C->varlist_end());
1209     Privates.append(C->privates().begin(), C->privates().end());
1210     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1211     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1212     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1213     if (C->getModifier() == OMPC_REDUCTION_task) {
1214       Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1215       Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1216       Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1217       Data.ReductionOps.append(C->reduction_ops().begin(),
1218                                C->reduction_ops().end());
1219       TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1220       TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1221     }
1222   }
1223   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1224   unsigned Count = 0;
1225   auto *ILHS = LHSs.begin();
1226   auto *IRHS = RHSs.begin();
1227   auto *IPriv = Privates.begin();
1228   for (const Expr *IRef : Shareds) {
1229     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1230     // Emit private VarDecl with reduction init.
1231     RedCG.emitSharedOrigLValue(*this, Count);
1232     RedCG.emitAggregateType(*this, Count);
1233     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1234     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1235                              RedCG.getSharedLValue(Count).getAddress(*this),
1236                              [&Emission](CodeGenFunction &CGF) {
1237                                CGF.EmitAutoVarInit(Emission);
1238                                return true;
1239                              });
1240     EmitAutoVarCleanups(Emission);
1241     Address BaseAddr = RedCG.adjustPrivateAddress(
1242         *this, Count, Emission.getAllocatedAddress());
1243     bool IsRegistered =
1244         PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1245     assert(IsRegistered && "private var already registered as private");
1246     // Silence the warning about unused variable.
1247     (void)IsRegistered;
1248 
1249     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1250     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1251     QualType Type = PrivateVD->getType();
1252     bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1253     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1254       // Store the address of the original variable associated with the LHS
1255       // implicit variable.
1256       PrivateScope.addPrivate(LHSVD,
1257                               RedCG.getSharedLValue(Count).getAddress(*this));
1258       PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1259     } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1260                isa<ArraySubscriptExpr>(IRef)) {
1261       // Store the address of the original variable associated with the LHS
1262       // implicit variable.
1263       PrivateScope.addPrivate(LHSVD,
1264                               RedCG.getSharedLValue(Count).getAddress(*this));
1265       PrivateScope.addPrivate(RHSVD,
1266                               GetAddrOfLocalVar(PrivateVD).withElementType(
1267                                   ConvertTypeForMem(RHSVD->getType())));
1268     } else {
1269       QualType Type = PrivateVD->getType();
1270       bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1271       Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1272       // Store the address of the original variable associated with the LHS
1273       // implicit variable.
1274       if (IsArray) {
1275         OriginalAddr =
1276             OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType()));
1277       }
1278       PrivateScope.addPrivate(LHSVD, OriginalAddr);
1279       PrivateScope.addPrivate(
1280           RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType(
1281                                ConvertTypeForMem(RHSVD->getType()))
1282                          : GetAddrOfLocalVar(PrivateVD));
1283     }
1284     ++ILHS;
1285     ++IRHS;
1286     ++IPriv;
1287     ++Count;
1288   }
1289   if (!Data.ReductionVars.empty()) {
1290     Data.IsReductionWithTaskMod = true;
1291     Data.IsWorksharingReduction =
1292         isOpenMPWorksharingDirective(D.getDirectiveKind());
1293     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1294         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1295     const Expr *TaskRedRef = nullptr;
1296     switch (D.getDirectiveKind()) {
1297     case OMPD_parallel:
1298       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1299       break;
1300     case OMPD_for:
1301       TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1302       break;
1303     case OMPD_sections:
1304       TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1305       break;
1306     case OMPD_parallel_for:
1307       TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1308       break;
1309     case OMPD_parallel_master:
1310       TaskRedRef =
1311           cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1312       break;
1313     case OMPD_parallel_sections:
1314       TaskRedRef =
1315           cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1316       break;
1317     case OMPD_target_parallel:
1318       TaskRedRef =
1319           cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1320       break;
1321     case OMPD_target_parallel_for:
1322       TaskRedRef =
1323           cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1324       break;
1325     case OMPD_distribute_parallel_for:
1326       TaskRedRef =
1327           cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1328       break;
1329     case OMPD_teams_distribute_parallel_for:
1330       TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1331                        .getTaskReductionRefExpr();
1332       break;
1333     case OMPD_target_teams_distribute_parallel_for:
1334       TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1335                        .getTaskReductionRefExpr();
1336       break;
1337     case OMPD_simd:
1338     case OMPD_for_simd:
1339     case OMPD_section:
1340     case OMPD_single:
1341     case OMPD_master:
1342     case OMPD_critical:
1343     case OMPD_parallel_for_simd:
1344     case OMPD_task:
1345     case OMPD_taskyield:
1346     case OMPD_error:
1347     case OMPD_barrier:
1348     case OMPD_taskwait:
1349     case OMPD_taskgroup:
1350     case OMPD_flush:
1351     case OMPD_depobj:
1352     case OMPD_scan:
1353     case OMPD_ordered:
1354     case OMPD_atomic:
1355     case OMPD_teams:
1356     case OMPD_target:
1357     case OMPD_cancellation_point:
1358     case OMPD_cancel:
1359     case OMPD_target_data:
1360     case OMPD_target_enter_data:
1361     case OMPD_target_exit_data:
1362     case OMPD_taskloop:
1363     case OMPD_taskloop_simd:
1364     case OMPD_master_taskloop:
1365     case OMPD_master_taskloop_simd:
1366     case OMPD_parallel_master_taskloop:
1367     case OMPD_parallel_master_taskloop_simd:
1368     case OMPD_distribute:
1369     case OMPD_target_update:
1370     case OMPD_distribute_parallel_for_simd:
1371     case OMPD_distribute_simd:
1372     case OMPD_target_parallel_for_simd:
1373     case OMPD_target_simd:
1374     case OMPD_teams_distribute:
1375     case OMPD_teams_distribute_simd:
1376     case OMPD_teams_distribute_parallel_for_simd:
1377     case OMPD_target_teams:
1378     case OMPD_target_teams_distribute:
1379     case OMPD_target_teams_distribute_parallel_for_simd:
1380     case OMPD_target_teams_distribute_simd:
1381     case OMPD_declare_target:
1382     case OMPD_end_declare_target:
1383     case OMPD_threadprivate:
1384     case OMPD_allocate:
1385     case OMPD_declare_reduction:
1386     case OMPD_declare_mapper:
1387     case OMPD_declare_simd:
1388     case OMPD_requires:
1389     case OMPD_declare_variant:
1390     case OMPD_begin_declare_variant:
1391     case OMPD_end_declare_variant:
1392     case OMPD_unknown:
1393     default:
1394       llvm_unreachable("Enexpected directive with task reductions.");
1395     }
1396 
1397     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1398     EmitVarDecl(*VD);
1399     EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1400                       /*Volatile=*/false, TaskRedRef->getType());
1401   }
1402 }
1403 
EmitOMPReductionClauseFinal(const OMPExecutableDirective & D,const OpenMPDirectiveKind ReductionKind)1404 void CodeGenFunction::EmitOMPReductionClauseFinal(
1405     const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1406   if (!HaveInsertPoint())
1407     return;
1408   llvm::SmallVector<const Expr *, 8> Privates;
1409   llvm::SmallVector<const Expr *, 8> LHSExprs;
1410   llvm::SmallVector<const Expr *, 8> RHSExprs;
1411   llvm::SmallVector<const Expr *, 8> ReductionOps;
1412   bool HasAtLeastOneReduction = false;
1413   bool IsReductionWithTaskMod = false;
1414   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1415     // Do not emit for inscan reductions.
1416     if (C->getModifier() == OMPC_REDUCTION_inscan)
1417       continue;
1418     HasAtLeastOneReduction = true;
1419     Privates.append(C->privates().begin(), C->privates().end());
1420     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1421     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1422     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1423     IsReductionWithTaskMod =
1424         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1425   }
1426   if (HasAtLeastOneReduction) {
1427     if (IsReductionWithTaskMod) {
1428       CGM.getOpenMPRuntime().emitTaskReductionFini(
1429           *this, D.getBeginLoc(),
1430           isOpenMPWorksharingDirective(D.getDirectiveKind()));
1431     }
1432     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1433                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
1434                       ReductionKind == OMPD_simd;
1435     bool SimpleReduction = ReductionKind == OMPD_simd;
1436     // Emit nowait reduction if nowait clause is present or directive is a
1437     // parallel directive (it always has implicit barrier).
1438     CGM.getOpenMPRuntime().emitReduction(
1439         *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1440         {WithNowait, SimpleReduction, ReductionKind});
1441   }
1442 }
1443 
emitPostUpdateForReductionClause(CodeGenFunction & CGF,const OMPExecutableDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)1444 static void emitPostUpdateForReductionClause(
1445     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1446     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1447   if (!CGF.HaveInsertPoint())
1448     return;
1449   llvm::BasicBlock *DoneBB = nullptr;
1450   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1451     if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1452       if (!DoneBB) {
1453         if (llvm::Value *Cond = CondGen(CGF)) {
1454           // If the first post-update expression is found, emit conditional
1455           // block if it was requested.
1456           llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1457           DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1458           CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1459           CGF.EmitBlock(ThenBB);
1460         }
1461       }
1462       CGF.EmitIgnoredExpr(PostUpdate);
1463     }
1464   }
1465   if (DoneBB)
1466     CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1467 }
1468 
1469 namespace {
1470 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1471 /// parallel function. This is necessary for combined constructs such as
1472 /// 'distribute parallel for'
1473 typedef llvm::function_ref<void(CodeGenFunction &,
1474                                 const OMPExecutableDirective &,
1475                                 llvm::SmallVectorImpl<llvm::Value *> &)>
1476     CodeGenBoundParametersTy;
1477 } // anonymous namespace
1478 
1479 static void
checkForLastprivateConditionalUpdate(CodeGenFunction & CGF,const OMPExecutableDirective & S)1480 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1481                                      const OMPExecutableDirective &S) {
1482   if (CGF.getLangOpts().OpenMP < 50)
1483     return;
1484   llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1485   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1486     for (const Expr *Ref : C->varlists()) {
1487       if (!Ref->getType()->isScalarType())
1488         continue;
1489       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1490       if (!DRE)
1491         continue;
1492       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1493       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1494     }
1495   }
1496   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1497     for (const Expr *Ref : C->varlists()) {
1498       if (!Ref->getType()->isScalarType())
1499         continue;
1500       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1501       if (!DRE)
1502         continue;
1503       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1504       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1505     }
1506   }
1507   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1508     for (const Expr *Ref : C->varlists()) {
1509       if (!Ref->getType()->isScalarType())
1510         continue;
1511       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1512       if (!DRE)
1513         continue;
1514       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1515       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1516     }
1517   }
1518   // Privates should ne analyzed since they are not captured at all.
1519   // Task reductions may be skipped - tasks are ignored.
1520   // Firstprivates do not return value but may be passed by reference - no need
1521   // to check for updated lastprivate conditional.
1522   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1523     for (const Expr *Ref : C->varlists()) {
1524       if (!Ref->getType()->isScalarType())
1525         continue;
1526       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1527       if (!DRE)
1528         continue;
1529       PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1530     }
1531   }
1532   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1533       CGF, S, PrivateDecls);
1534 }
1535 
emitCommonOMPParallelDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen,const CodeGenBoundParametersTy & CodeGenBoundParameters)1536 static void emitCommonOMPParallelDirective(
1537     CodeGenFunction &CGF, const OMPExecutableDirective &S,
1538     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1539     const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1540   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1541   llvm::Value *NumThreads = nullptr;
1542   llvm::Function *OutlinedFn =
1543       CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1544           CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1545           CodeGen);
1546   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1547     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1548     NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1549                                     /*IgnoreResultAssign=*/true);
1550     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1551         CGF, NumThreads, NumThreadsClause->getBeginLoc());
1552   }
1553   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1554     CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1555     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1556         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1557   }
1558   const Expr *IfCond = nullptr;
1559   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1560     if (C->getNameModifier() == OMPD_unknown ||
1561         C->getNameModifier() == OMPD_parallel) {
1562       IfCond = C->getCondition();
1563       break;
1564     }
1565   }
1566 
1567   OMPParallelScope Scope(CGF, S);
1568   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1569   // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1570   // lower and upper bounds with the pragma 'for' chunking mechanism.
1571   // The following lambda takes care of appending the lower and upper bound
1572   // parameters when necessary
1573   CodeGenBoundParameters(CGF, S, CapturedVars);
1574   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1575   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1576                                               CapturedVars, IfCond, NumThreads);
1577 }
1578 
isAllocatableDecl(const VarDecl * VD)1579 static bool isAllocatableDecl(const VarDecl *VD) {
1580   const VarDecl *CVD = VD->getCanonicalDecl();
1581   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1582     return false;
1583   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1584   // Use the default allocation.
1585   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1586             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1587            !AA->getAllocator());
1588 }
1589 
emitEmptyBoundParameters(CodeGenFunction &,const OMPExecutableDirective &,llvm::SmallVectorImpl<llvm::Value * > &)1590 static void emitEmptyBoundParameters(CodeGenFunction &,
1591                                      const OMPExecutableDirective &,
1592                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
1593 
emitOMPCopyinClause(CodeGenFunction & CGF,const OMPExecutableDirective & S)1594 static void emitOMPCopyinClause(CodeGenFunction &CGF,
1595                                 const OMPExecutableDirective &S) {
1596   bool Copyins = CGF.EmitOMPCopyinClause(S);
1597   if (Copyins) {
1598     // Emit implicit barrier to synchronize threads and avoid data races on
1599     // propagation master's thread values of threadprivate variables to local
1600     // instances of that variables of all other implicit threads.
1601     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1602         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1603         /*ForceSimpleCall=*/true);
1604   }
1605 }
1606 
getAddressOfLocalVariable(CodeGenFunction & CGF,const VarDecl * VD)1607 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1608     CodeGenFunction &CGF, const VarDecl *VD) {
1609   CodeGenModule &CGM = CGF.CGM;
1610   auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1611 
1612   if (!VD)
1613     return Address::invalid();
1614   const VarDecl *CVD = VD->getCanonicalDecl();
1615   if (!isAllocatableDecl(CVD))
1616     return Address::invalid();
1617   llvm::Value *Size;
1618   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1619   if (CVD->getType()->isVariablyModifiedType()) {
1620     Size = CGF.getTypeSize(CVD->getType());
1621     // Align the size: ((size + align - 1) / align) * align
1622     Size = CGF.Builder.CreateNUWAdd(
1623         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1624     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1625     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1626   } else {
1627     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1628     Size = CGM.getSize(Sz.alignTo(Align));
1629   }
1630 
1631   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1632   assert(AA->getAllocator() &&
1633          "Expected allocator expression for non-default allocator.");
1634   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1635   // According to the standard, the original allocator type is a enum (integer).
1636   // Convert to pointer type, if required.
1637   if (Allocator->getType()->isIntegerTy())
1638     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1639   else if (Allocator->getType()->isPointerTy())
1640     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1641                                                                 CGM.VoidPtrTy);
1642 
1643   llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1644       CGF.Builder, Size, Allocator,
1645       getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1646   llvm::CallInst *FreeCI =
1647       OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1648 
1649   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1650   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1651       Addr,
1652       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1653       getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1654   return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1655 }
1656 
getAddrOfThreadPrivate(CodeGenFunction & CGF,const VarDecl * VD,Address VDAddr,SourceLocation Loc)1657 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1658     CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1659     SourceLocation Loc) {
1660   CodeGenModule &CGM = CGF.CGM;
1661   if (CGM.getLangOpts().OpenMPUseTLS &&
1662       CGM.getContext().getTargetInfo().isTLSSupported())
1663     return VDAddr;
1664 
1665   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1666 
1667   llvm::Type *VarTy = VDAddr.getElementType();
1668   llvm::Value *Data =
1669       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1670   llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1671   std::string Suffix = getNameWithSeparators({"cache", ""});
1672   llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1673 
1674   llvm::CallInst *ThreadPrivateCacheCall =
1675       OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1676 
1677   return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1678 }
1679 
getNameWithSeparators(ArrayRef<StringRef> Parts,StringRef FirstSeparator,StringRef Separator)1680 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1681     ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1682   SmallString<128> Buffer;
1683   llvm::raw_svector_ostream OS(Buffer);
1684   StringRef Sep = FirstSeparator;
1685   for (StringRef Part : Parts) {
1686     OS << Sep << Part;
1687     Sep = Separator;
1688   }
1689   return OS.str().str();
1690 }
1691 
EmitOMPInlinedRegionBody(CodeGenFunction & CGF,const Stmt * RegionBodyStmt,InsertPointTy AllocaIP,InsertPointTy CodeGenIP,Twine RegionName)1692 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1693     CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1694     InsertPointTy CodeGenIP, Twine RegionName) {
1695   CGBuilderTy &Builder = CGF.Builder;
1696   Builder.restoreIP(CodeGenIP);
1697   llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1698                                                "." + RegionName + ".after");
1699 
1700   {
1701     OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1702     CGF.EmitStmt(RegionBodyStmt);
1703   }
1704 
1705   if (Builder.saveIP().isSet())
1706     Builder.CreateBr(FiniBB);
1707 }
1708 
EmitOMPOutlinedRegionBody(CodeGenFunction & CGF,const Stmt * RegionBodyStmt,InsertPointTy AllocaIP,InsertPointTy CodeGenIP,Twine RegionName)1709 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1710     CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1711     InsertPointTy CodeGenIP, Twine RegionName) {
1712   CGBuilderTy &Builder = CGF.Builder;
1713   Builder.restoreIP(CodeGenIP);
1714   llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1715                                                "." + RegionName + ".after");
1716 
1717   {
1718     OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1719     CGF.EmitStmt(RegionBodyStmt);
1720   }
1721 
1722   if (Builder.saveIP().isSet())
1723     Builder.CreateBr(FiniBB);
1724 }
1725 
EmitOMPParallelDirective(const OMPParallelDirective & S)1726 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1727   if (CGM.getLangOpts().OpenMPIRBuilder) {
1728     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1729     // Check if we have any if clause associated with the directive.
1730     llvm::Value *IfCond = nullptr;
1731     if (const auto *C = S.getSingleClause<OMPIfClause>())
1732       IfCond = EmitScalarExpr(C->getCondition(),
1733                               /*IgnoreResultAssign=*/true);
1734 
1735     llvm::Value *NumThreads = nullptr;
1736     if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1737       NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1738                                   /*IgnoreResultAssign=*/true);
1739 
1740     ProcBindKind ProcBind = OMP_PROC_BIND_default;
1741     if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1742       ProcBind = ProcBindClause->getProcBindKind();
1743 
1744     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1745 
1746     // The cleanup callback that finalizes all variabels at the given location,
1747     // thus calls destructors etc.
1748     auto FiniCB = [this](InsertPointTy IP) {
1749       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1750     };
1751 
1752     // Privatization callback that performs appropriate action for
1753     // shared/private/firstprivate/lastprivate/copyin/... variables.
1754     //
1755     // TODO: This defaults to shared right now.
1756     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1757                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1758       // The next line is appropriate only for variables (Val) with the
1759       // data-sharing attribute "shared".
1760       ReplVal = &Val;
1761 
1762       return CodeGenIP;
1763     };
1764 
1765     const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1766     const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1767 
1768     auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1769                                InsertPointTy CodeGenIP) {
1770       OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1771           *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1772     };
1773 
1774     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1775     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1776     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1777         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1778     Builder.restoreIP(
1779         OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1780                                   IfCond, NumThreads, ProcBind, S.hasCancel()));
1781     return;
1782   }
1783 
1784   // Emit parallel region as a standalone region.
1785   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1786     Action.Enter(CGF);
1787     OMPPrivateScope PrivateScope(CGF);
1788     emitOMPCopyinClause(CGF, S);
1789     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1790     CGF.EmitOMPPrivateClause(S, PrivateScope);
1791     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1792     (void)PrivateScope.Privatize();
1793     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1794     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1795   };
1796   {
1797     auto LPCRegion =
1798         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1799     emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1800                                    emitEmptyBoundParameters);
1801     emitPostUpdateForReductionClause(*this, S,
1802                                      [](CodeGenFunction &) { return nullptr; });
1803   }
1804   // Check for outer lastprivate conditional update.
1805   checkForLastprivateConditionalUpdate(*this, S);
1806 }
1807 
EmitOMPMetaDirective(const OMPMetaDirective & S)1808 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1809   EmitStmt(S.getIfStmt());
1810 }
1811 
1812 namespace {
1813 /// RAII to handle scopes for loop transformation directives.
1814 class OMPTransformDirectiveScopeRAII {
1815   OMPLoopScope *Scope = nullptr;
1816   CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1817   CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1818 
1819   OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1820       delete;
1821   OMPTransformDirectiveScopeRAII &
1822   operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1823 
1824 public:
OMPTransformDirectiveScopeRAII(CodeGenFunction & CGF,const Stmt * S)1825   OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1826     if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1827       Scope = new OMPLoopScope(CGF, *Dir);
1828       CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1829       CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1830     }
1831   }
~OMPTransformDirectiveScopeRAII()1832   ~OMPTransformDirectiveScopeRAII() {
1833     if (!Scope)
1834       return;
1835     delete CapInfoRAII;
1836     delete CGSI;
1837     delete Scope;
1838   }
1839 };
1840 } // namespace
1841 
emitBody(CodeGenFunction & CGF,const Stmt * S,const Stmt * NextLoop,int MaxLevel,int Level=0)1842 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1843                      int MaxLevel, int Level = 0) {
1844   assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1845   const Stmt *SimplifiedS = S->IgnoreContainers();
1846   if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1847     PrettyStackTraceLoc CrashInfo(
1848         CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1849         "LLVM IR generation of compound statement ('{}')");
1850 
1851     // Keep track of the current cleanup stack depth, including debug scopes.
1852     CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1853     for (const Stmt *CurStmt : CS->body())
1854       emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1855     return;
1856   }
1857   if (SimplifiedS == NextLoop) {
1858     if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1859       SimplifiedS = Dir->getTransformedStmt();
1860     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1861       SimplifiedS = CanonLoop->getLoopStmt();
1862     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1863       S = For->getBody();
1864     } else {
1865       assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1866              "Expected canonical for loop or range-based for loop.");
1867       const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1868       CGF.EmitStmt(CXXFor->getLoopVarStmt());
1869       S = CXXFor->getBody();
1870     }
1871     if (Level + 1 < MaxLevel) {
1872       NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1873           S, /*TryImperfectlyNestedLoops=*/true);
1874       emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1875       return;
1876     }
1877   }
1878   CGF.EmitStmt(S);
1879 }
1880 
EmitOMPLoopBody(const OMPLoopDirective & D,JumpDest LoopExit)1881 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1882                                       JumpDest LoopExit) {
1883   RunCleanupsScope BodyScope(*this);
1884   // Update counters values on current iteration.
1885   for (const Expr *UE : D.updates())
1886     EmitIgnoredExpr(UE);
1887   // Update the linear variables.
1888   // In distribute directives only loop counters may be marked as linear, no
1889   // need to generate the code for them.
1890   if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1891     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1892       for (const Expr *UE : C->updates())
1893         EmitIgnoredExpr(UE);
1894     }
1895   }
1896 
1897   // On a continue in the body, jump to the end.
1898   JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1899   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1900   for (const Expr *E : D.finals_conditions()) {
1901     if (!E)
1902       continue;
1903     // Check that loop counter in non-rectangular nest fits into the iteration
1904     // space.
1905     llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1906     EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1907                          getProfileCount(D.getBody()));
1908     EmitBlock(NextBB);
1909   }
1910 
1911   OMPPrivateScope InscanScope(*this);
1912   EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1913   bool IsInscanRegion = InscanScope.Privatize();
1914   if (IsInscanRegion) {
1915     // Need to remember the block before and after scan directive
1916     // to dispatch them correctly depending on the clause used in
1917     // this directive, inclusive or exclusive. For inclusive scan the natural
1918     // order of the blocks is used, for exclusive clause the blocks must be
1919     // executed in reverse order.
1920     OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1921     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1922     // No need to allocate inscan exit block, in simd mode it is selected in the
1923     // codegen for the scan directive.
1924     if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1925       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1926     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1927     EmitBranch(OMPScanDispatch);
1928     EmitBlock(OMPBeforeScanBlock);
1929   }
1930 
1931   // Emit loop variables for C++ range loops.
1932   const Stmt *Body =
1933       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1934   // Emit loop body.
1935   emitBody(*this, Body,
1936            OMPLoopBasedDirective::tryToFindNextInnerLoop(
1937                Body, /*TryImperfectlyNestedLoops=*/true),
1938            D.getLoopsNumber());
1939 
1940   // Jump to the dispatcher at the end of the loop body.
1941   if (IsInscanRegion)
1942     EmitBranch(OMPScanExitBlock);
1943 
1944   // The end (updates/cleanups).
1945   EmitBlock(Continue.getBlock());
1946   BreakContinueStack.pop_back();
1947 }
1948 
1949 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1950 
1951 /// Emit a captured statement and return the function as well as its captured
1952 /// closure context.
emitCapturedStmtFunc(CodeGenFunction & ParentCGF,const CapturedStmt * S)1953 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1954                                              const CapturedStmt *S) {
1955   LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1956   CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1957   std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1958       std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1959   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1960   llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1961 
1962   return {F, CapStruct.getPointer(ParentCGF)};
1963 }
1964 
1965 /// Emit a call to a previously captured closure.
1966 static llvm::CallInst *
emitCapturedStmtCall(CodeGenFunction & ParentCGF,EmittedClosureTy Cap,llvm::ArrayRef<llvm::Value * > Args)1967 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1968                      llvm::ArrayRef<llvm::Value *> Args) {
1969   // Append the closure context to the argument.
1970   SmallVector<llvm::Value *> EffectiveArgs;
1971   EffectiveArgs.reserve(Args.size() + 1);
1972   llvm::append_range(EffectiveArgs, Args);
1973   EffectiveArgs.push_back(Cap.second);
1974 
1975   return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1976 }
1977 
1978 llvm::CanonicalLoopInfo *
EmitOMPCollapsedCanonicalLoopNest(const Stmt * S,int Depth)1979 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1980   assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1981 
1982   // The caller is processing the loop-associated directive processing the \p
1983   // Depth loops nested in \p S. Put the previous pending loop-associated
1984   // directive to the stack. If the current loop-associated directive is a loop
1985   // transformation directive, it will push its generated loops onto the stack
1986   // such that together with the loops left here they form the combined loop
1987   // nest for the parent loop-associated directive.
1988   int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1989   ExpectedOMPLoopDepth = Depth;
1990 
1991   EmitStmt(S);
1992   assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1993 
1994   // The last added loop is the outermost one.
1995   llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
1996 
1997   // Pop the \p Depth loops requested by the call from that stack and restore
1998   // the previous context.
1999   OMPLoopNestStack.pop_back_n(Depth);
2000   ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2001 
2002   return Result;
2003 }
2004 
EmitOMPCanonicalLoop(const OMPCanonicalLoop * S)2005 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2006   const Stmt *SyntacticalLoop = S->getLoopStmt();
2007   if (!getLangOpts().OpenMPIRBuilder) {
2008     // Ignore if OpenMPIRBuilder is not enabled.
2009     EmitStmt(SyntacticalLoop);
2010     return;
2011   }
2012 
2013   LexicalScope ForScope(*this, S->getSourceRange());
2014 
2015   // Emit init statements. The Distance/LoopVar funcs may reference variable
2016   // declarations they contain.
2017   const Stmt *BodyStmt;
2018   if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2019     if (const Stmt *InitStmt = For->getInit())
2020       EmitStmt(InitStmt);
2021     BodyStmt = For->getBody();
2022   } else if (const auto *RangeFor =
2023                  dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2024     if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2025       EmitStmt(RangeStmt);
2026     if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2027       EmitStmt(BeginStmt);
2028     if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2029       EmitStmt(EndStmt);
2030     if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2031       EmitStmt(LoopVarStmt);
2032     BodyStmt = RangeFor->getBody();
2033   } else
2034     llvm_unreachable("Expected for-stmt or range-based for-stmt");
2035 
2036   // Emit closure for later use. By-value captures will be captured here.
2037   const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2038   EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2039   const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2040   EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2041 
2042   // Call the distance function to get the number of iterations of the loop to
2043   // come.
2044   QualType LogicalTy = DistanceFunc->getCapturedDecl()
2045                            ->getParam(0)
2046                            ->getType()
2047                            .getNonReferenceType();
2048   Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2049   emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2050   llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2051 
2052   // Emit the loop structure.
2053   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2054   auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2055                            llvm::Value *IndVar) {
2056     Builder.restoreIP(CodeGenIP);
2057 
2058     // Emit the loop body: Convert the logical iteration number to the loop
2059     // variable and emit the body.
2060     const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2061     LValue LCVal = EmitLValue(LoopVarRef);
2062     Address LoopVarAddress = LCVal.getAddress(*this);
2063     emitCapturedStmtCall(*this, LoopVarClosure,
2064                          {LoopVarAddress.getPointer(), IndVar});
2065 
2066     RunCleanupsScope BodyScope(*this);
2067     EmitStmt(BodyStmt);
2068   };
2069   llvm::CanonicalLoopInfo *CL =
2070       OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2071 
2072   // Finish up the loop.
2073   Builder.restoreIP(CL->getAfterIP());
2074   ForScope.ForceCleanup();
2075 
2076   // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2077   OMPLoopNestStack.push_back(CL);
2078 }
2079 
EmitOMPInnerLoop(const OMPExecutableDirective & S,bool RequiresCleanup,const Expr * LoopCond,const Expr * IncExpr,const llvm::function_ref<void (CodeGenFunction &)> BodyGen,const llvm::function_ref<void (CodeGenFunction &)> PostIncGen)2080 void CodeGenFunction::EmitOMPInnerLoop(
2081     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2082     const Expr *IncExpr,
2083     const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2084     const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2085   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2086 
2087   // Start the loop with a block that tests the condition.
2088   auto CondBlock = createBasicBlock("omp.inner.for.cond");
2089   EmitBlock(CondBlock);
2090   const SourceRange R = S.getSourceRange();
2091 
2092   // If attributes are attached, push to the basic block with them.
2093   const auto &OMPED = cast<OMPExecutableDirective>(S);
2094   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2095   const Stmt *SS = ICS->getCapturedStmt();
2096   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2097   OMPLoopNestStack.clear();
2098   if (AS)
2099     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2100                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2101                    SourceLocToDebugLoc(R.getEnd()));
2102   else
2103     LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2104                    SourceLocToDebugLoc(R.getEnd()));
2105 
2106   // If there are any cleanups between here and the loop-exit scope,
2107   // create a block to stage a loop exit along.
2108   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2109   if (RequiresCleanup)
2110     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2111 
2112   llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2113 
2114   // Emit condition.
2115   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2116   if (ExitBlock != LoopExit.getBlock()) {
2117     EmitBlock(ExitBlock);
2118     EmitBranchThroughCleanup(LoopExit);
2119   }
2120 
2121   EmitBlock(LoopBody);
2122   incrementProfileCounter(&S);
2123 
2124   // Create a block for the increment.
2125   JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2126   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2127 
2128   BodyGen(*this);
2129 
2130   // Emit "IV = IV + 1" and a back-edge to the condition block.
2131   EmitBlock(Continue.getBlock());
2132   EmitIgnoredExpr(IncExpr);
2133   PostIncGen(*this);
2134   BreakContinueStack.pop_back();
2135   EmitBranch(CondBlock);
2136   LoopStack.pop();
2137   // Emit the fall-through block.
2138   EmitBlock(LoopExit.getBlock());
2139 }
2140 
EmitOMPLinearClauseInit(const OMPLoopDirective & D)2141 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2142   if (!HaveInsertPoint())
2143     return false;
2144   // Emit inits for the linear variables.
2145   bool HasLinears = false;
2146   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2147     for (const Expr *Init : C->inits()) {
2148       HasLinears = true;
2149       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2150       if (const auto *Ref =
2151               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2152         AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2153         const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2154         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2155                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
2156                         VD->getInit()->getType(), VK_LValue,
2157                         VD->getInit()->getExprLoc());
2158         EmitExprAsInit(
2159             &DRE, VD,
2160             MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2161             /*capturedByInit=*/false);
2162         EmitAutoVarCleanups(Emission);
2163       } else {
2164         EmitVarDecl(*VD);
2165       }
2166     }
2167     // Emit the linear steps for the linear clauses.
2168     // If a step is not constant, it is pre-calculated before the loop.
2169     if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2170       if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2171         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2172         // Emit calculation of the linear step.
2173         EmitIgnoredExpr(CS);
2174       }
2175   }
2176   return HasLinears;
2177 }
2178 
EmitOMPLinearClauseFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2179 void CodeGenFunction::EmitOMPLinearClauseFinal(
2180     const OMPLoopDirective &D,
2181     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2182   if (!HaveInsertPoint())
2183     return;
2184   llvm::BasicBlock *DoneBB = nullptr;
2185   // Emit the final values of the linear variables.
2186   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2187     auto IC = C->varlist_begin();
2188     for (const Expr *F : C->finals()) {
2189       if (!DoneBB) {
2190         if (llvm::Value *Cond = CondGen(*this)) {
2191           // If the first post-update expression is found, emit conditional
2192           // block if it was requested.
2193           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2194           DoneBB = createBasicBlock(".omp.linear.pu.done");
2195           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2196           EmitBlock(ThenBB);
2197         }
2198       }
2199       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2200       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2201                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
2202                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2203       Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2204       CodeGenFunction::OMPPrivateScope VarScope(*this);
2205       VarScope.addPrivate(OrigVD, OrigAddr);
2206       (void)VarScope.Privatize();
2207       EmitIgnoredExpr(F);
2208       ++IC;
2209     }
2210     if (const Expr *PostUpdate = C->getPostUpdateExpr())
2211       EmitIgnoredExpr(PostUpdate);
2212   }
2213   if (DoneBB)
2214     EmitBlock(DoneBB, /*IsFinished=*/true);
2215 }
2216 
emitAlignedClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2217 static void emitAlignedClause(CodeGenFunction &CGF,
2218                               const OMPExecutableDirective &D) {
2219   if (!CGF.HaveInsertPoint())
2220     return;
2221   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2222     llvm::APInt ClauseAlignment(64, 0);
2223     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2224       auto *AlignmentCI =
2225           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2226       ClauseAlignment = AlignmentCI->getValue();
2227     }
2228     for (const Expr *E : Clause->varlists()) {
2229       llvm::APInt Alignment(ClauseAlignment);
2230       if (Alignment == 0) {
2231         // OpenMP [2.8.1, Description]
2232         // If no optional parameter is specified, implementation-defined default
2233         // alignments for SIMD instructions on the target platforms are assumed.
2234         Alignment =
2235             CGF.getContext()
2236                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2237                     E->getType()->getPointeeType()))
2238                 .getQuantity();
2239       }
2240       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2241              "alignment is not power of 2");
2242       if (Alignment != 0) {
2243         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2244         CGF.emitAlignmentAssumption(
2245             PtrValue, E, /*No second loc needed*/ SourceLocation(),
2246             llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2247       }
2248     }
2249   }
2250 }
2251 
EmitOMPPrivateLoopCounters(const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope)2252 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2253     const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2254   if (!HaveInsertPoint())
2255     return;
2256   auto I = S.private_counters().begin();
2257   for (const Expr *E : S.counters()) {
2258     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2259     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2260     // Emit var without initialization.
2261     AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2262     EmitAutoVarCleanups(VarEmission);
2263     LocalDeclMap.erase(PrivateVD);
2264     (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2265     if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2266         VD->hasGlobalStorage()) {
2267       DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2268                       LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2269                       E->getType(), VK_LValue, E->getExprLoc());
2270       (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this));
2271     } else {
2272       (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2273     }
2274     ++I;
2275   }
2276   // Privatize extra loop counters used in loops for ordered(n) clauses.
2277   for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2278     if (!C->getNumForLoops())
2279       continue;
2280     for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2281          I < E; ++I) {
2282       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2283       const auto *VD = cast<VarDecl>(DRE->getDecl());
2284       // Override only those variables that can be captured to avoid re-emission
2285       // of the variables declared within the loops.
2286       if (DRE->refersToEnclosingVariableOrCapture()) {
2287         (void)LoopScope.addPrivate(
2288             VD, CreateMemTemp(DRE->getType(), VD->getName()));
2289       }
2290     }
2291   }
2292 }
2293 
emitPreCond(CodeGenFunction & CGF,const OMPLoopDirective & S,const Expr * Cond,llvm::BasicBlock * TrueBlock,llvm::BasicBlock * FalseBlock,uint64_t TrueCount)2294 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2295                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
2296                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2297   if (!CGF.HaveInsertPoint())
2298     return;
2299   {
2300     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2301     CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2302     (void)PreCondScope.Privatize();
2303     // Get initial values of real counters.
2304     for (const Expr *I : S.inits()) {
2305       CGF.EmitIgnoredExpr(I);
2306     }
2307   }
2308   // Create temp loop control variables with their init values to support
2309   // non-rectangular loops.
2310   CodeGenFunction::OMPMapVars PreCondVars;
2311   for (const Expr *E : S.dependent_counters()) {
2312     if (!E)
2313       continue;
2314     assert(!E->getType().getNonReferenceType()->isRecordType() &&
2315            "dependent counter must not be an iterator.");
2316     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2317     Address CounterAddr =
2318         CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2319     (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2320   }
2321   (void)PreCondVars.apply(CGF);
2322   for (const Expr *E : S.dependent_inits()) {
2323     if (!E)
2324       continue;
2325     CGF.EmitIgnoredExpr(E);
2326   }
2327   // Check that loop is executed at least one time.
2328   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2329   PreCondVars.restore(CGF);
2330 }
2331 
EmitOMPLinearClause(const OMPLoopDirective & D,CodeGenFunction::OMPPrivateScope & PrivateScope)2332 void CodeGenFunction::EmitOMPLinearClause(
2333     const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2334   if (!HaveInsertPoint())
2335     return;
2336   llvm::DenseSet<const VarDecl *> SIMDLCVs;
2337   if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2338     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2339     for (const Expr *C : LoopDirective->counters()) {
2340       SIMDLCVs.insert(
2341           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2342     }
2343   }
2344   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2345     auto CurPrivate = C->privates().begin();
2346     for (const Expr *E : C->varlists()) {
2347       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2348       const auto *PrivateVD =
2349           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2350       if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2351         // Emit private VarDecl with copy init.
2352         EmitVarDecl(*PrivateVD);
2353         bool IsRegistered =
2354             PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2355         assert(IsRegistered && "linear var already registered as private");
2356         // Silence the warning about unused variable.
2357         (void)IsRegistered;
2358       } else {
2359         EmitVarDecl(*PrivateVD);
2360       }
2361       ++CurPrivate;
2362     }
2363   }
2364 }
2365 
emitSimdlenSafelenClause(CodeGenFunction & CGF,const OMPExecutableDirective & D)2366 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2367                                      const OMPExecutableDirective &D) {
2368   if (!CGF.HaveInsertPoint())
2369     return;
2370   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2371     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2372                                  /*ignoreResult=*/true);
2373     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2374     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2375     // In presence of finite 'safelen', it may be unsafe to mark all
2376     // the memory instructions parallel, because loop-carried
2377     // dependences of 'safelen' iterations are possible.
2378     CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2379   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2380     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2381                                  /*ignoreResult=*/true);
2382     auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2383     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2384     // In presence of finite 'safelen', it may be unsafe to mark all
2385     // the memory instructions parallel, because loop-carried
2386     // dependences of 'safelen' iterations are possible.
2387     CGF.LoopStack.setParallel(/*Enable=*/false);
2388   }
2389 }
2390 
EmitOMPSimdInit(const OMPLoopDirective & D)2391 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2392   // Walk clauses and process safelen/lastprivate.
2393   LoopStack.setParallel(/*Enable=*/true);
2394   LoopStack.setVectorizeEnable();
2395   emitSimdlenSafelenClause(*this, D);
2396   if (const auto *C = D.getSingleClause<OMPOrderClause>())
2397     if (C->getKind() == OMPC_ORDER_concurrent)
2398       LoopStack.setParallel(/*Enable=*/true);
2399   if ((D.getDirectiveKind() == OMPD_simd ||
2400        (getLangOpts().OpenMPSimd &&
2401         isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2402       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2403                    [](const OMPReductionClause *C) {
2404                      return C->getModifier() == OMPC_REDUCTION_inscan;
2405                    }))
2406     // Disable parallel access in case of prefix sum.
2407     LoopStack.setParallel(/*Enable=*/false);
2408 }
2409 
EmitOMPSimdFinal(const OMPLoopDirective & D,const llvm::function_ref<llvm::Value * (CodeGenFunction &)> CondGen)2410 void CodeGenFunction::EmitOMPSimdFinal(
2411     const OMPLoopDirective &D,
2412     const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2413   if (!HaveInsertPoint())
2414     return;
2415   llvm::BasicBlock *DoneBB = nullptr;
2416   auto IC = D.counters().begin();
2417   auto IPC = D.private_counters().begin();
2418   for (const Expr *F : D.finals()) {
2419     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2420     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2421     const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2422     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2423         OrigVD->hasGlobalStorage() || CED) {
2424       if (!DoneBB) {
2425         if (llvm::Value *Cond = CondGen(*this)) {
2426           // If the first post-update expression is found, emit conditional
2427           // block if it was requested.
2428           llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2429           DoneBB = createBasicBlock(".omp.final.done");
2430           Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2431           EmitBlock(ThenBB);
2432         }
2433       }
2434       Address OrigAddr = Address::invalid();
2435       if (CED) {
2436         OrigAddr =
2437             EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2438       } else {
2439         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2440                         /*RefersToEnclosingVariableOrCapture=*/false,
2441                         (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2442         OrigAddr = EmitLValue(&DRE).getAddress(*this);
2443       }
2444       OMPPrivateScope VarScope(*this);
2445       VarScope.addPrivate(OrigVD, OrigAddr);
2446       (void)VarScope.Privatize();
2447       EmitIgnoredExpr(F);
2448     }
2449     ++IC;
2450     ++IPC;
2451   }
2452   if (DoneBB)
2453     EmitBlock(DoneBB, /*IsFinished=*/true);
2454 }
2455 
emitOMPLoopBodyWithStopPoint(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)2456 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2457                                          const OMPLoopDirective &S,
2458                                          CodeGenFunction::JumpDest LoopExit) {
2459   CGF.EmitOMPLoopBody(S, LoopExit);
2460   CGF.EmitStopPoint(&S);
2461 }
2462 
2463 /// Emit a helper variable and return corresponding lvalue.
EmitOMPHelperVar(CodeGenFunction & CGF,const DeclRefExpr * Helper)2464 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2465                                const DeclRefExpr *Helper) {
2466   auto VDecl = cast<VarDecl>(Helper->getDecl());
2467   CGF.EmitVarDecl(*VDecl);
2468   return CGF.EmitLValue(Helper);
2469 }
2470 
emitCommonSimdLoop(CodeGenFunction & CGF,const OMPLoopDirective & S,const RegionCodeGenTy & SimdInitGen,const RegionCodeGenTy & BodyCodeGen)2471 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2472                                const RegionCodeGenTy &SimdInitGen,
2473                                const RegionCodeGenTy &BodyCodeGen) {
2474   auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2475                                                     PrePostActionTy &) {
2476     CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2477     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2478     SimdInitGen(CGF);
2479 
2480     BodyCodeGen(CGF);
2481   };
2482   auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2483     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2484     CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2485 
2486     BodyCodeGen(CGF);
2487   };
2488   const Expr *IfCond = nullptr;
2489   if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2490     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2491       if (CGF.getLangOpts().OpenMP >= 50 &&
2492           (C->getNameModifier() == OMPD_unknown ||
2493            C->getNameModifier() == OMPD_simd)) {
2494         IfCond = C->getCondition();
2495         break;
2496       }
2497     }
2498   }
2499   if (IfCond) {
2500     CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2501   } else {
2502     RegionCodeGenTy ThenRCG(ThenGen);
2503     ThenRCG(CGF);
2504   }
2505 }
2506 
emitOMPSimdRegion(CodeGenFunction & CGF,const OMPLoopDirective & S,PrePostActionTy & Action)2507 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2508                               PrePostActionTy &Action) {
2509   Action.Enter(CGF);
2510   assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2511          "Expected simd directive");
2512   OMPLoopScope PreInitScope(CGF, S);
2513   // if (PreCond) {
2514   //   for (IV in 0..LastIteration) BODY;
2515   //   <Final counter/linear vars updates>;
2516   // }
2517   //
2518   if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2519       isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2520       isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2521     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2522     (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2523   }
2524 
2525   // Emit: if (PreCond) - begin.
2526   // If the condition constant folds and can be elided, avoid emitting the
2527   // whole loop.
2528   bool CondConstant;
2529   llvm::BasicBlock *ContBlock = nullptr;
2530   if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2531     if (!CondConstant)
2532       return;
2533   } else {
2534     llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2535     ContBlock = CGF.createBasicBlock("simd.if.end");
2536     emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2537                 CGF.getProfileCount(&S));
2538     CGF.EmitBlock(ThenBlock);
2539     CGF.incrementProfileCounter(&S);
2540   }
2541 
2542   // Emit the loop iteration variable.
2543   const Expr *IVExpr = S.getIterationVariable();
2544   const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2545   CGF.EmitVarDecl(*IVDecl);
2546   CGF.EmitIgnoredExpr(S.getInit());
2547 
2548   // Emit the iterations count variable.
2549   // If it is not a variable, Sema decided to calculate iterations count on
2550   // each iteration (e.g., it is foldable into a constant).
2551   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2552     CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2553     // Emit calculation of the iterations count.
2554     CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2555   }
2556 
2557   emitAlignedClause(CGF, S);
2558   (void)CGF.EmitOMPLinearClauseInit(S);
2559   {
2560     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2561     CGF.EmitOMPPrivateClause(S, LoopScope);
2562     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2563     CGF.EmitOMPLinearClause(S, LoopScope);
2564     CGF.EmitOMPReductionClauseInit(S, LoopScope);
2565     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2566         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2567     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2568     (void)LoopScope.Privatize();
2569     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2570       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2571 
2572     emitCommonSimdLoop(
2573         CGF, S,
2574         [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2575           CGF.EmitOMPSimdInit(S);
2576         },
2577         [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2578           CGF.EmitOMPInnerLoop(
2579               S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2580               [&S](CodeGenFunction &CGF) {
2581                 emitOMPLoopBodyWithStopPoint(CGF, S,
2582                                              CodeGenFunction::JumpDest());
2583               },
2584               [](CodeGenFunction &) {});
2585         });
2586     CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2587     // Emit final copy of the lastprivate variables at the end of loops.
2588     if (HasLastprivateClause)
2589       CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2590     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2591     emitPostUpdateForReductionClause(CGF, S,
2592                                      [](CodeGenFunction &) { return nullptr; });
2593     LoopScope.restoreMap();
2594     CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2595   }
2596   // Emit: if (PreCond) - end.
2597   if (ContBlock) {
2598     CGF.EmitBranch(ContBlock);
2599     CGF.EmitBlock(ContBlock, true);
2600   }
2601 }
2602 
isSupportedByOpenMPIRBuilder(const OMPSimdDirective & S)2603 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2604   // Check for unsupported clauses
2605   for (OMPClause *C : S.clauses()) {
2606     // Currently only order, simdlen and safelen clauses are supported
2607     if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2608           isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2609       return false;
2610   }
2611 
2612   // Check if we have a statement with the ordered directive.
2613   // Visit the statement hierarchy to find a compound statement
2614   // with a ordered directive in it.
2615   if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2616     if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2617       for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2618         if (!SubStmt)
2619           continue;
2620         if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2621           for (const Stmt *CSSubStmt : CS->children()) {
2622             if (!CSSubStmt)
2623               continue;
2624             if (isa<OMPOrderedDirective>(CSSubStmt)) {
2625               return false;
2626             }
2627           }
2628         }
2629       }
2630     }
2631   }
2632   return true;
2633 }
2634 static llvm::MapVector<llvm::Value *, llvm::Value *>
GetAlignedMapping(const OMPSimdDirective & S,CodeGenFunction & CGF)2635 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2636   llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2637   for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2638     llvm::APInt ClauseAlignment(64, 0);
2639     if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2640       auto *AlignmentCI =
2641           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2642       ClauseAlignment = AlignmentCI->getValue();
2643     }
2644     for (const Expr *E : Clause->varlists()) {
2645       llvm::APInt Alignment(ClauseAlignment);
2646       if (Alignment == 0) {
2647         // OpenMP [2.8.1, Description]
2648         // If no optional parameter is specified, implementation-defined default
2649         // alignments for SIMD instructions on the target platforms are assumed.
2650         Alignment =
2651             CGF.getContext()
2652                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2653                     E->getType()->getPointeeType()))
2654                 .getQuantity();
2655       }
2656       assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2657              "alignment is not power of 2");
2658       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2659       AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2660     }
2661   }
2662   return AlignedVars;
2663 }
2664 
EmitOMPSimdDirective(const OMPSimdDirective & S)2665 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2666   bool UseOMPIRBuilder =
2667       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2668   if (UseOMPIRBuilder) {
2669     auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2670                                                           PrePostActionTy &) {
2671       // Use the OpenMPIRBuilder if enabled.
2672       if (UseOMPIRBuilder) {
2673         llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2674             GetAlignedMapping(S, CGF);
2675         // Emit the associated statement and get its loop representation.
2676         const Stmt *Inner = S.getRawStmt();
2677         llvm::CanonicalLoopInfo *CLI =
2678             EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2679 
2680         llvm::OpenMPIRBuilder &OMPBuilder =
2681             CGM.getOpenMPRuntime().getOMPBuilder();
2682         // Add SIMD specific metadata
2683         llvm::ConstantInt *Simdlen = nullptr;
2684         if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2685           RValue Len =
2686               this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2687                                 /*ignoreResult=*/true);
2688           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2689           Simdlen = Val;
2690         }
2691         llvm::ConstantInt *Safelen = nullptr;
2692         if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2693           RValue Len =
2694               this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2695                                 /*ignoreResult=*/true);
2696           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2697           Safelen = Val;
2698         }
2699         llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2700         if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2701           if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2702             Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2703           }
2704         }
2705         // Add simd metadata to the collapsed loop. Do not generate
2706         // another loop for if clause. Support for if clause is done earlier.
2707         OMPBuilder.applySimd(CLI, AlignedVars,
2708                              /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2709         return;
2710       }
2711     };
2712     {
2713       auto LPCRegion =
2714           CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2715       OMPLexicalScope Scope(*this, S, OMPD_unknown);
2716       CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2717                                                   CodeGenIRBuilder);
2718     }
2719     return;
2720   }
2721 
2722   ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2723   OMPFirstScanLoop = true;
2724   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2725     emitOMPSimdRegion(CGF, S, Action);
2726   };
2727   {
2728     auto LPCRegion =
2729         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2730     OMPLexicalScope Scope(*this, S, OMPD_unknown);
2731     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2732   }
2733   // Check for outer lastprivate conditional update.
2734   checkForLastprivateConditionalUpdate(*this, S);
2735 }
2736 
EmitOMPTileDirective(const OMPTileDirective & S)2737 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2738   // Emit the de-sugared statement.
2739   OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2740   EmitStmt(S.getTransformedStmt());
2741 }
2742 
EmitOMPUnrollDirective(const OMPUnrollDirective & S)2743 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2744   bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2745 
2746   if (UseOMPIRBuilder) {
2747     auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2748     const Stmt *Inner = S.getRawStmt();
2749 
2750     // Consume nested loop. Clear the entire remaining loop stack because a
2751     // fully unrolled loop is non-transformable. For partial unrolling the
2752     // generated outer loop is pushed back to the stack.
2753     llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2754     OMPLoopNestStack.clear();
2755 
2756     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2757 
2758     bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2759     llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2760 
2761     if (S.hasClausesOfKind<OMPFullClause>()) {
2762       assert(ExpectedOMPLoopDepth == 0);
2763       OMPBuilder.unrollLoopFull(DL, CLI);
2764     } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2765       uint64_t Factor = 0;
2766       if (Expr *FactorExpr = PartialClause->getFactor()) {
2767         Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2768         assert(Factor >= 1 && "Only positive factors are valid");
2769       }
2770       OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2771                                    NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2772     } else {
2773       OMPBuilder.unrollLoopHeuristic(DL, CLI);
2774     }
2775 
2776     assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2777            "NeedsUnrolledCLI implies UnrolledCLI to be set");
2778     if (UnrolledCLI)
2779       OMPLoopNestStack.push_back(UnrolledCLI);
2780 
2781     return;
2782   }
2783 
2784   // This function is only called if the unrolled loop is not consumed by any
2785   // other loop-associated construct. Such a loop-associated construct will have
2786   // used the transformed AST.
2787 
2788   // Set the unroll metadata for the next emitted loop.
2789   LoopStack.setUnrollState(LoopAttributes::Enable);
2790 
2791   if (S.hasClausesOfKind<OMPFullClause>()) {
2792     LoopStack.setUnrollState(LoopAttributes::Full);
2793   } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2794     if (Expr *FactorExpr = PartialClause->getFactor()) {
2795       uint64_t Factor =
2796           FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2797       assert(Factor >= 1 && "Only positive factors are valid");
2798       LoopStack.setUnrollCount(Factor);
2799     }
2800   }
2801 
2802   EmitStmt(S.getAssociatedStmt());
2803 }
2804 
EmitOMPOuterLoop(bool DynamicOrOrdered,bool IsMonotonic,const OMPLoopDirective & S,CodeGenFunction::OMPPrivateScope & LoopScope,const CodeGenFunction::OMPLoopArguments & LoopArgs,const CodeGenFunction::CodeGenLoopTy & CodeGenLoop,const CodeGenFunction::CodeGenOrderedTy & CodeGenOrdered)2805 void CodeGenFunction::EmitOMPOuterLoop(
2806     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2807     CodeGenFunction::OMPPrivateScope &LoopScope,
2808     const CodeGenFunction::OMPLoopArguments &LoopArgs,
2809     const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2810     const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2811   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2812 
2813   const Expr *IVExpr = S.getIterationVariable();
2814   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2815   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2816 
2817   JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2818 
2819   // Start the loop with a block that tests the condition.
2820   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2821   EmitBlock(CondBlock);
2822   const SourceRange R = S.getSourceRange();
2823   OMPLoopNestStack.clear();
2824   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2825                  SourceLocToDebugLoc(R.getEnd()));
2826 
2827   llvm::Value *BoolCondVal = nullptr;
2828   if (!DynamicOrOrdered) {
2829     // UB = min(UB, GlobalUB) or
2830     // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2831     // 'distribute parallel for')
2832     EmitIgnoredExpr(LoopArgs.EUB);
2833     // IV = LB
2834     EmitIgnoredExpr(LoopArgs.Init);
2835     // IV < UB
2836     BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2837   } else {
2838     BoolCondVal =
2839         RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2840                        LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2841   }
2842 
2843   // If there are any cleanups between here and the loop-exit scope,
2844   // create a block to stage a loop exit along.
2845   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2846   if (LoopScope.requiresCleanups())
2847     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2848 
2849   llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2850   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2851   if (ExitBlock != LoopExit.getBlock()) {
2852     EmitBlock(ExitBlock);
2853     EmitBranchThroughCleanup(LoopExit);
2854   }
2855   EmitBlock(LoopBody);
2856 
2857   // Emit "IV = LB" (in case of static schedule, we have already calculated new
2858   // LB for loop condition and emitted it above).
2859   if (DynamicOrOrdered)
2860     EmitIgnoredExpr(LoopArgs.Init);
2861 
2862   // Create a block for the increment.
2863   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2864   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2865 
2866   emitCommonSimdLoop(
2867       *this, S,
2868       [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2869         // Generate !llvm.loop.parallel metadata for loads and stores for loops
2870         // with dynamic/guided scheduling and without ordered clause.
2871         if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2872           CGF.LoopStack.setParallel(!IsMonotonic);
2873           if (const auto *C = S.getSingleClause<OMPOrderClause>())
2874             if (C->getKind() == OMPC_ORDER_concurrent)
2875               CGF.LoopStack.setParallel(/*Enable=*/true);
2876         } else {
2877           CGF.EmitOMPSimdInit(S);
2878         }
2879       },
2880       [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2881        &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2882         SourceLocation Loc = S.getBeginLoc();
2883         // when 'distribute' is not combined with a 'for':
2884         // while (idx <= UB) { BODY; ++idx; }
2885         // when 'distribute' is combined with a 'for'
2886         // (e.g. 'distribute parallel for')
2887         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2888         CGF.EmitOMPInnerLoop(
2889             S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2890             [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2891               CodeGenLoop(CGF, S, LoopExit);
2892             },
2893             [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2894               CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2895             });
2896       });
2897 
2898   EmitBlock(Continue.getBlock());
2899   BreakContinueStack.pop_back();
2900   if (!DynamicOrOrdered) {
2901     // Emit "LB = LB + Stride", "UB = UB + Stride".
2902     EmitIgnoredExpr(LoopArgs.NextLB);
2903     EmitIgnoredExpr(LoopArgs.NextUB);
2904   }
2905 
2906   EmitBranch(CondBlock);
2907   OMPLoopNestStack.clear();
2908   LoopStack.pop();
2909   // Emit the fall-through block.
2910   EmitBlock(LoopExit.getBlock());
2911 
2912   // Tell the runtime we are done.
2913   auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2914     if (!DynamicOrOrdered)
2915       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2916                                                      S.getDirectiveKind());
2917   };
2918   OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2919 }
2920 
EmitOMPForOuterLoop(const OpenMPScheduleTy & ScheduleKind,bool IsMonotonic,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,bool Ordered,const OMPLoopArguments & LoopArgs,const CodeGenDispatchBoundsTy & CGDispatchBounds)2921 void CodeGenFunction::EmitOMPForOuterLoop(
2922     const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2923     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2924     const OMPLoopArguments &LoopArgs,
2925     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2926   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2927 
2928   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2929   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2930 
2931   assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2932                                             LoopArgs.Chunk != nullptr)) &&
2933          "static non-chunked schedule does not need outer loop");
2934 
2935   // Emit outer loop.
2936   //
2937   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2938   // When schedule(dynamic,chunk_size) is specified, the iterations are
2939   // distributed to threads in the team in chunks as the threads request them.
2940   // Each thread executes a chunk of iterations, then requests another chunk,
2941   // until no chunks remain to be distributed. Each chunk contains chunk_size
2942   // iterations, except for the last chunk to be distributed, which may have
2943   // fewer iterations. When no chunk_size is specified, it defaults to 1.
2944   //
2945   // When schedule(guided,chunk_size) is specified, the iterations are assigned
2946   // to threads in the team in chunks as the executing threads request them.
2947   // Each thread executes a chunk of iterations, then requests another chunk,
2948   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2949   // each chunk is proportional to the number of unassigned iterations divided
2950   // by the number of threads in the team, decreasing to 1. For a chunk_size
2951   // with value k (greater than 1), the size of each chunk is determined in the
2952   // same way, with the restriction that the chunks do not contain fewer than k
2953   // iterations (except for the last chunk to be assigned, which may have fewer
2954   // than k iterations).
2955   //
2956   // When schedule(auto) is specified, the decision regarding scheduling is
2957   // delegated to the compiler and/or runtime system. The programmer gives the
2958   // implementation the freedom to choose any possible mapping of iterations to
2959   // threads in the team.
2960   //
2961   // When schedule(runtime) is specified, the decision regarding scheduling is
2962   // deferred until run time, and the schedule and chunk size are taken from the
2963   // run-sched-var ICV. If the ICV is set to auto, the schedule is
2964   // implementation defined
2965   //
2966   // while(__kmpc_dispatch_next(&LB, &UB)) {
2967   //   idx = LB;
2968   //   while (idx <= UB) { BODY; ++idx;
2969   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2970   //   } // inner loop
2971   // }
2972   //
2973   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2974   // When schedule(static, chunk_size) is specified, iterations are divided into
2975   // chunks of size chunk_size, and the chunks are assigned to the threads in
2976   // the team in a round-robin fashion in the order of the thread number.
2977   //
2978   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2979   //   while (idx <= UB) { BODY; ++idx; } // inner loop
2980   //   LB = LB + ST;
2981   //   UB = UB + ST;
2982   // }
2983   //
2984 
2985   const Expr *IVExpr = S.getIterationVariable();
2986   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2987   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2988 
2989   if (DynamicOrOrdered) {
2990     const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2991         CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2992     llvm::Value *LBVal = DispatchBounds.first;
2993     llvm::Value *UBVal = DispatchBounds.second;
2994     CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2995                                                              LoopArgs.Chunk};
2996     RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2997                            IVSigned, Ordered, DipatchRTInputValues);
2998   } else {
2999     CGOpenMPRuntime::StaticRTInput StaticInit(
3000         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3001         LoopArgs.ST, LoopArgs.Chunk);
3002     RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3003                          ScheduleKind, StaticInit);
3004   }
3005 
3006   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3007                                     const unsigned IVSize,
3008                                     const bool IVSigned) {
3009     if (Ordered) {
3010       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3011                                                             IVSigned);
3012     }
3013   };
3014 
3015   OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3016                                  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3017   OuterLoopArgs.IncExpr = S.getInc();
3018   OuterLoopArgs.Init = S.getInit();
3019   OuterLoopArgs.Cond = S.getCond();
3020   OuterLoopArgs.NextLB = S.getNextLowerBound();
3021   OuterLoopArgs.NextUB = S.getNextUpperBound();
3022   EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3023                    emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3024 }
3025 
emitEmptyOrdered(CodeGenFunction &,SourceLocation Loc,const unsigned IVSize,const bool IVSigned)3026 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3027                              const unsigned IVSize, const bool IVSigned) {}
3028 
EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,const OMPLoopDirective & S,OMPPrivateScope & LoopScope,const OMPLoopArguments & LoopArgs,const CodeGenLoopTy & CodeGenLoopContent)3029 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3030     OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3031     OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3032     const CodeGenLoopTy &CodeGenLoopContent) {
3033 
3034   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3035 
3036   // Emit outer loop.
3037   // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3038   // dynamic
3039   //
3040 
3041   const Expr *IVExpr = S.getIterationVariable();
3042   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3043   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3044 
3045   CGOpenMPRuntime::StaticRTInput StaticInit(
3046       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3047       LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3048   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3049 
3050   // for combined 'distribute' and 'for' the increment expression of distribute
3051   // is stored in DistInc. For 'distribute' alone, it is in Inc.
3052   Expr *IncExpr;
3053   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3054     IncExpr = S.getDistInc();
3055   else
3056     IncExpr = S.getInc();
3057 
3058   // this routine is shared by 'omp distribute parallel for' and
3059   // 'omp distribute': select the right EUB expression depending on the
3060   // directive
3061   OMPLoopArguments OuterLoopArgs;
3062   OuterLoopArgs.LB = LoopArgs.LB;
3063   OuterLoopArgs.UB = LoopArgs.UB;
3064   OuterLoopArgs.ST = LoopArgs.ST;
3065   OuterLoopArgs.IL = LoopArgs.IL;
3066   OuterLoopArgs.Chunk = LoopArgs.Chunk;
3067   OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3068                           ? S.getCombinedEnsureUpperBound()
3069                           : S.getEnsureUpperBound();
3070   OuterLoopArgs.IncExpr = IncExpr;
3071   OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3072                            ? S.getCombinedInit()
3073                            : S.getInit();
3074   OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3075                            ? S.getCombinedCond()
3076                            : S.getCond();
3077   OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3078                              ? S.getCombinedNextLowerBound()
3079                              : S.getNextLowerBound();
3080   OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3081                              ? S.getCombinedNextUpperBound()
3082                              : S.getNextUpperBound();
3083 
3084   EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3085                    LoopScope, OuterLoopArgs, CodeGenLoopContent,
3086                    emitEmptyOrdered);
3087 }
3088 
3089 static std::pair<LValue, LValue>
emitDistributeParallelForInnerBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3090 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3091                                      const OMPExecutableDirective &S) {
3092   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3093   LValue LB =
3094       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3095   LValue UB =
3096       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3097 
3098   // When composing 'distribute' with 'for' (e.g. as in 'distribute
3099   // parallel for') we need to use the 'distribute'
3100   // chunk lower and upper bounds rather than the whole loop iteration
3101   // space. These are parameters to the outlined function for 'parallel'
3102   // and we copy the bounds of the previous schedule into the
3103   // the current ones.
3104   LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3105   LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3106   llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3107       PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3108   PrevLBVal = CGF.EmitScalarConversion(
3109       PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3110       LS.getIterationVariable()->getType(),
3111       LS.getPrevLowerBoundVariable()->getExprLoc());
3112   llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3113       PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3114   PrevUBVal = CGF.EmitScalarConversion(
3115       PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3116       LS.getIterationVariable()->getType(),
3117       LS.getPrevUpperBoundVariable()->getExprLoc());
3118 
3119   CGF.EmitStoreOfScalar(PrevLBVal, LB);
3120   CGF.EmitStoreOfScalar(PrevUBVal, UB);
3121 
3122   return {LB, UB};
3123 }
3124 
3125 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3126 /// we need to use the LB and UB expressions generated by the worksharing
3127 /// code generation support, whereas in non combined situations we would
3128 /// just emit 0 and the LastIteration expression
3129 /// This function is necessary due to the difference of the LB and UB
3130 /// types for the RT emission routines for 'for_static_init' and
3131 /// 'for_dispatch_init'
3132 static std::pair<llvm::Value *, llvm::Value *>
emitDistributeParallelForDispatchBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3133 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3134                                         const OMPExecutableDirective &S,
3135                                         Address LB, Address UB) {
3136   const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3137   const Expr *IVExpr = LS.getIterationVariable();
3138   // when implementing a dynamic schedule for a 'for' combined with a
3139   // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3140   // is not normalized as each team only executes its own assigned
3141   // distribute chunk
3142   QualType IteratorTy = IVExpr->getType();
3143   llvm::Value *LBVal =
3144       CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3145   llvm::Value *UBVal =
3146       CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3147   return {LBVal, UBVal};
3148 }
3149 
emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction & CGF,const OMPExecutableDirective & S,llvm::SmallVectorImpl<llvm::Value * > & CapturedVars)3150 static void emitDistributeParallelForDistributeInnerBoundParams(
3151     CodeGenFunction &CGF, const OMPExecutableDirective &S,
3152     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3153   const auto &Dir = cast<OMPLoopDirective>(S);
3154   LValue LB =
3155       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3156   llvm::Value *LBCast =
3157       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3158                                 CGF.SizeTy, /*isSigned=*/false);
3159   CapturedVars.push_back(LBCast);
3160   LValue UB =
3161       CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3162 
3163   llvm::Value *UBCast =
3164       CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3165                                 CGF.SizeTy, /*isSigned=*/false);
3166   CapturedVars.push_back(UBCast);
3167 }
3168 
3169 static void
emitInnerParallelForWhenCombined(CodeGenFunction & CGF,const OMPLoopDirective & S,CodeGenFunction::JumpDest LoopExit)3170 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3171                                  const OMPLoopDirective &S,
3172                                  CodeGenFunction::JumpDest LoopExit) {
3173   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3174                                          PrePostActionTy &Action) {
3175     Action.Enter(CGF);
3176     bool HasCancel = false;
3177     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3178       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3179         HasCancel = D->hasCancel();
3180       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3181         HasCancel = D->hasCancel();
3182       else if (const auto *D =
3183                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3184         HasCancel = D->hasCancel();
3185     }
3186     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3187                                                      HasCancel);
3188     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3189                                emitDistributeParallelForInnerBounds,
3190                                emitDistributeParallelForDispatchBounds);
3191   };
3192 
3193   emitCommonOMPParallelDirective(
3194       CGF, S,
3195       isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3196       CGInlinedWorksharingLoop,
3197       emitDistributeParallelForDistributeInnerBoundParams);
3198 }
3199 
EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective & S)3200 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3201     const OMPDistributeParallelForDirective &S) {
3202   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3203     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3204                               S.getDistInc());
3205   };
3206   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3207   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3208 }
3209 
EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective & S)3210 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3211     const OMPDistributeParallelForSimdDirective &S) {
3212   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3213     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3214                               S.getDistInc());
3215   };
3216   OMPLexicalScope Scope(*this, S, OMPD_parallel);
3217   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3218 }
3219 
EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective & S)3220 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3221     const OMPDistributeSimdDirective &S) {
3222   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3223     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3224   };
3225   OMPLexicalScope Scope(*this, S, OMPD_unknown);
3226   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3227 }
3228 
EmitOMPTargetSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetSimdDirective & S)3229 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3230     CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3231   // Emit SPMD target parallel for region as a standalone region.
3232   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3233     emitOMPSimdRegion(CGF, S, Action);
3234   };
3235   llvm::Function *Fn;
3236   llvm::Constant *Addr;
3237   // Emit target region as a standalone region.
3238   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3239       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3240   assert(Fn && Addr && "Target device function emission failed.");
3241 }
3242 
EmitOMPTargetSimdDirective(const OMPTargetSimdDirective & S)3243 void CodeGenFunction::EmitOMPTargetSimdDirective(
3244     const OMPTargetSimdDirective &S) {
3245   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3246     emitOMPSimdRegion(CGF, S, Action);
3247   };
3248   emitCommonOMPTargetDirective(*this, S, CodeGen);
3249 }
3250 
3251 namespace {
3252 struct ScheduleKindModifiersTy {
3253   OpenMPScheduleClauseKind Kind;
3254   OpenMPScheduleClauseModifier M1;
3255   OpenMPScheduleClauseModifier M2;
ScheduleKindModifiersTy__anona385fc872811::ScheduleKindModifiersTy3256   ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3257                           OpenMPScheduleClauseModifier M1,
3258                           OpenMPScheduleClauseModifier M2)
3259       : Kind(Kind), M1(M1), M2(M2) {}
3260 };
3261 } // namespace
3262 
EmitOMPWorksharingLoop(const OMPLoopDirective & S,Expr * EUB,const CodeGenLoopBoundsTy & CodeGenLoopBounds,const CodeGenDispatchBoundsTy & CGDispatchBounds)3263 bool CodeGenFunction::EmitOMPWorksharingLoop(
3264     const OMPLoopDirective &S, Expr *EUB,
3265     const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3266     const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3267   // Emit the loop iteration variable.
3268   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3269   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3270   EmitVarDecl(*IVDecl);
3271 
3272   // Emit the iterations count variable.
3273   // If it is not a variable, Sema decided to calculate iterations count on each
3274   // iteration (e.g., it is foldable into a constant).
3275   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3276     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3277     // Emit calculation of the iterations count.
3278     EmitIgnoredExpr(S.getCalcLastIteration());
3279   }
3280 
3281   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3282 
3283   bool HasLastprivateClause;
3284   // Check pre-condition.
3285   {
3286     OMPLoopScope PreInitScope(*this, S);
3287     // Skip the entire loop if we don't meet the precondition.
3288     // If the condition constant folds and can be elided, avoid emitting the
3289     // whole loop.
3290     bool CondConstant;
3291     llvm::BasicBlock *ContBlock = nullptr;
3292     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3293       if (!CondConstant)
3294         return false;
3295     } else {
3296       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3297       ContBlock = createBasicBlock("omp.precond.end");
3298       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3299                   getProfileCount(&S));
3300       EmitBlock(ThenBlock);
3301       incrementProfileCounter(&S);
3302     }
3303 
3304     RunCleanupsScope DoacrossCleanupScope(*this);
3305     bool Ordered = false;
3306     if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3307       if (OrderedClause->getNumForLoops())
3308         RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3309       else
3310         Ordered = true;
3311     }
3312 
3313     llvm::DenseSet<const Expr *> EmittedFinals;
3314     emitAlignedClause(*this, S);
3315     bool HasLinears = EmitOMPLinearClauseInit(S);
3316     // Emit helper vars inits.
3317 
3318     std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3319     LValue LB = Bounds.first;
3320     LValue UB = Bounds.second;
3321     LValue ST =
3322         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3323     LValue IL =
3324         EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3325 
3326     // Emit 'then' code.
3327     {
3328       OMPPrivateScope LoopScope(*this);
3329       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3330         // Emit implicit barrier to synchronize threads and avoid data races on
3331         // initialization of firstprivate variables and post-update of
3332         // lastprivate variables.
3333         CGM.getOpenMPRuntime().emitBarrierCall(
3334             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3335             /*ForceSimpleCall=*/true);
3336       }
3337       EmitOMPPrivateClause(S, LoopScope);
3338       CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3339           *this, S, EmitLValue(S.getIterationVariable()));
3340       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3341       EmitOMPReductionClauseInit(S, LoopScope);
3342       EmitOMPPrivateLoopCounters(S, LoopScope);
3343       EmitOMPLinearClause(S, LoopScope);
3344       (void)LoopScope.Privatize();
3345       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3346         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3347 
3348       // Detect the loop schedule kind and chunk.
3349       const Expr *ChunkExpr = nullptr;
3350       OpenMPScheduleTy ScheduleKind;
3351       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3352         ScheduleKind.Schedule = C->getScheduleKind();
3353         ScheduleKind.M1 = C->getFirstScheduleModifier();
3354         ScheduleKind.M2 = C->getSecondScheduleModifier();
3355         ChunkExpr = C->getChunkSize();
3356       } else {
3357         // Default behaviour for schedule clause.
3358         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3359             *this, S, ScheduleKind.Schedule, ChunkExpr);
3360       }
3361       bool HasChunkSizeOne = false;
3362       llvm::Value *Chunk = nullptr;
3363       if (ChunkExpr) {
3364         Chunk = EmitScalarExpr(ChunkExpr);
3365         Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3366                                      S.getIterationVariable()->getType(),
3367                                      S.getBeginLoc());
3368         Expr::EvalResult Result;
3369         if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3370           llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3371           HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3372         }
3373       }
3374       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3375       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3376       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3377       // If the static schedule kind is specified or if the ordered clause is
3378       // specified, and if no monotonic modifier is specified, the effect will
3379       // be as if the monotonic modifier was specified.
3380       bool StaticChunkedOne =
3381           RT.isStaticChunked(ScheduleKind.Schedule,
3382                              /* Chunked */ Chunk != nullptr) &&
3383           HasChunkSizeOne &&
3384           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3385       bool IsMonotonic =
3386           Ordered ||
3387           (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3388            !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3389              ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3390           ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3391           ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3392       if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3393                                  /* Chunked */ Chunk != nullptr) ||
3394            StaticChunkedOne) &&
3395           !Ordered) {
3396         JumpDest LoopExit =
3397             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3398         emitCommonSimdLoop(
3399             *this, S,
3400             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3401               if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3402                 CGF.EmitOMPSimdInit(S);
3403               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3404                 if (C->getKind() == OMPC_ORDER_concurrent)
3405                   CGF.LoopStack.setParallel(/*Enable=*/true);
3406               }
3407             },
3408             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3409              &S, ScheduleKind, LoopExit,
3410              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3411               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3412               // When no chunk_size is specified, the iteration space is divided
3413               // into chunks that are approximately equal in size, and at most
3414               // one chunk is distributed to each thread. Note that the size of
3415               // the chunks is unspecified in this case.
3416               CGOpenMPRuntime::StaticRTInput StaticInit(
3417                   IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3418                   LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3419                   StaticChunkedOne ? Chunk : nullptr);
3420               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3421                   CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3422                   StaticInit);
3423               // UB = min(UB, GlobalUB);
3424               if (!StaticChunkedOne)
3425                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3426               // IV = LB;
3427               CGF.EmitIgnoredExpr(S.getInit());
3428               // For unchunked static schedule generate:
3429               //
3430               // while (idx <= UB) {
3431               //   BODY;
3432               //   ++idx;
3433               // }
3434               //
3435               // For static schedule with chunk one:
3436               //
3437               // while (IV <= PrevUB) {
3438               //   BODY;
3439               //   IV += ST;
3440               // }
3441               CGF.EmitOMPInnerLoop(
3442                   S, LoopScope.requiresCleanups(),
3443                   StaticChunkedOne ? S.getCombinedParForInDistCond()
3444                                    : S.getCond(),
3445                   StaticChunkedOne ? S.getDistInc() : S.getInc(),
3446                   [&S, LoopExit](CodeGenFunction &CGF) {
3447                     emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3448                   },
3449                   [](CodeGenFunction &) {});
3450             });
3451         EmitBlock(LoopExit.getBlock());
3452         // Tell the runtime we are done.
3453         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3454           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3455                                                          S.getDirectiveKind());
3456         };
3457         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3458       } else {
3459         // Emit the outer loop, which requests its work chunk [LB..UB] from
3460         // runtime and runs the inner loop to process it.
3461         const OMPLoopArguments LoopArguments(
3462             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3463             IL.getAddress(*this), Chunk, EUB);
3464         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3465                             LoopArguments, CGDispatchBounds);
3466       }
3467       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3468         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3469           return CGF.Builder.CreateIsNotNull(
3470               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3471         });
3472       }
3473       EmitOMPReductionClauseFinal(
3474           S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3475                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3476                  : /*Parallel only*/ OMPD_parallel);
3477       // Emit post-update of the reduction variables if IsLastIter != 0.
3478       emitPostUpdateForReductionClause(
3479           *this, S, [IL, &S](CodeGenFunction &CGF) {
3480             return CGF.Builder.CreateIsNotNull(
3481                 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3482           });
3483       // Emit final copy of the lastprivate variables if IsLastIter != 0.
3484       if (HasLastprivateClause)
3485         EmitOMPLastprivateClauseFinal(
3486             S, isOpenMPSimdDirective(S.getDirectiveKind()),
3487             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3488       LoopScope.restoreMap();
3489       EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3490         return CGF.Builder.CreateIsNotNull(
3491             CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3492       });
3493     }
3494     DoacrossCleanupScope.ForceCleanup();
3495     // We're now done with the loop, so jump to the continuation block.
3496     if (ContBlock) {
3497       EmitBranch(ContBlock);
3498       EmitBlock(ContBlock, /*IsFinished=*/true);
3499     }
3500   }
3501   return HasLastprivateClause;
3502 }
3503 
3504 /// The following two functions generate expressions for the loop lower
3505 /// and upper bounds in case of static and dynamic (dispatch) schedule
3506 /// of the associated 'for' or 'distribute' loop.
3507 static std::pair<LValue, LValue>
emitForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S)3508 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3509   const auto &LS = cast<OMPLoopDirective>(S);
3510   LValue LB =
3511       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3512   LValue UB =
3513       EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3514   return {LB, UB};
3515 }
3516 
3517 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3518 /// consider the lower and upper bound expressions generated by the
3519 /// worksharing loop support, but we use 0 and the iteration space size as
3520 /// constants
3521 static std::pair<llvm::Value *, llvm::Value *>
emitDispatchForLoopBounds(CodeGenFunction & CGF,const OMPExecutableDirective & S,Address LB,Address UB)3522 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3523                           Address LB, Address UB) {
3524   const auto &LS = cast<OMPLoopDirective>(S);
3525   const Expr *IVExpr = LS.getIterationVariable();
3526   const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3527   llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3528   llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3529   return {LBVal, UBVal};
3530 }
3531 
3532 /// Emits internal temp array declarations for the directive with inscan
3533 /// reductions.
3534 /// The code is the following:
3535 /// \code
3536 /// size num_iters = <num_iters>;
3537 /// <type> buffer[num_iters];
3538 /// \endcode
emitScanBasedDirectiveDecls(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3539 static void emitScanBasedDirectiveDecls(
3540     CodeGenFunction &CGF, const OMPLoopDirective &S,
3541     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3542   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3543       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3544   SmallVector<const Expr *, 4> Shareds;
3545   SmallVector<const Expr *, 4> Privates;
3546   SmallVector<const Expr *, 4> ReductionOps;
3547   SmallVector<const Expr *, 4> CopyArrayTemps;
3548   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3549     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3550            "Only inscan reductions are expected.");
3551     Shareds.append(C->varlist_begin(), C->varlist_end());
3552     Privates.append(C->privates().begin(), C->privates().end());
3553     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3554     CopyArrayTemps.append(C->copy_array_temps().begin(),
3555                           C->copy_array_temps().end());
3556   }
3557   {
3558     // Emit buffers for each reduction variables.
3559     // ReductionCodeGen is required to emit correctly the code for array
3560     // reductions.
3561     ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3562     unsigned Count = 0;
3563     auto *ITA = CopyArrayTemps.begin();
3564     for (const Expr *IRef : Privates) {
3565       const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3566       // Emit variably modified arrays, used for arrays/array sections
3567       // reductions.
3568       if (PrivateVD->getType()->isVariablyModifiedType()) {
3569         RedCG.emitSharedOrigLValue(CGF, Count);
3570         RedCG.emitAggregateType(CGF, Count);
3571       }
3572       CodeGenFunction::OpaqueValueMapping DimMapping(
3573           CGF,
3574           cast<OpaqueValueExpr>(
3575               cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3576                   ->getSizeExpr()),
3577           RValue::get(OMPScanNumIterations));
3578       // Emit temp buffer.
3579       CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3580       ++ITA;
3581       ++Count;
3582     }
3583   }
3584 }
3585 
3586 /// Copies final inscan reductions values to the original variables.
3587 /// The code is the following:
3588 /// \code
3589 /// <orig_var> = buffer[num_iters-1];
3590 /// \endcode
emitScanBasedDirectiveFinals(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen)3591 static void emitScanBasedDirectiveFinals(
3592     CodeGenFunction &CGF, const OMPLoopDirective &S,
3593     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3594   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3595       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3596   SmallVector<const Expr *, 4> Shareds;
3597   SmallVector<const Expr *, 4> LHSs;
3598   SmallVector<const Expr *, 4> RHSs;
3599   SmallVector<const Expr *, 4> Privates;
3600   SmallVector<const Expr *, 4> CopyOps;
3601   SmallVector<const Expr *, 4> CopyArrayElems;
3602   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3603     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3604            "Only inscan reductions are expected.");
3605     Shareds.append(C->varlist_begin(), C->varlist_end());
3606     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3607     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3608     Privates.append(C->privates().begin(), C->privates().end());
3609     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3610     CopyArrayElems.append(C->copy_array_elems().begin(),
3611                           C->copy_array_elems().end());
3612   }
3613   // Create temp var and copy LHS value to this temp value.
3614   // LHS = TMP[LastIter];
3615   llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3616       OMPScanNumIterations,
3617       llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3618   for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3619     const Expr *PrivateExpr = Privates[I];
3620     const Expr *OrigExpr = Shareds[I];
3621     const Expr *CopyArrayElem = CopyArrayElems[I];
3622     CodeGenFunction::OpaqueValueMapping IdxMapping(
3623         CGF,
3624         cast<OpaqueValueExpr>(
3625             cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3626         RValue::get(OMPLast));
3627     LValue DestLVal = CGF.EmitLValue(OrigExpr);
3628     LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3629     CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
3630                     SrcLVal.getAddress(CGF),
3631                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3632                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
3633                     CopyOps[I]);
3634   }
3635 }
3636 
3637 /// Emits the code for the directive with inscan reductions.
3638 /// The code is the following:
3639 /// \code
3640 /// #pragma omp ...
3641 /// for (i: 0..<num_iters>) {
3642 ///   <input phase>;
3643 ///   buffer[i] = red;
3644 /// }
3645 /// #pragma omp master // in parallel region
3646 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3647 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3648 ///   buffer[i] op= buffer[i-pow(2,k)];
3649 /// #pragma omp barrier // in parallel region
3650 /// #pragma omp ...
3651 /// for (0..<num_iters>) {
3652 ///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3653 ///   <scan phase>;
3654 /// }
3655 /// \endcode
emitScanBasedDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,llvm::function_ref<llvm::Value * (CodeGenFunction &)> NumIteratorsGen,llvm::function_ref<void (CodeGenFunction &)> FirstGen,llvm::function_ref<void (CodeGenFunction &)> SecondGen)3656 static void emitScanBasedDirective(
3657     CodeGenFunction &CGF, const OMPLoopDirective &S,
3658     llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3659     llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3660     llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3661   llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3662       NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3663   SmallVector<const Expr *, 4> Privates;
3664   SmallVector<const Expr *, 4> ReductionOps;
3665   SmallVector<const Expr *, 4> LHSs;
3666   SmallVector<const Expr *, 4> RHSs;
3667   SmallVector<const Expr *, 4> CopyArrayElems;
3668   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3669     assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3670            "Only inscan reductions are expected.");
3671     Privates.append(C->privates().begin(), C->privates().end());
3672     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3673     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3674     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3675     CopyArrayElems.append(C->copy_array_elems().begin(),
3676                           C->copy_array_elems().end());
3677   }
3678   CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3679   {
3680     // Emit loop with input phase:
3681     // #pragma omp ...
3682     // for (i: 0..<num_iters>) {
3683     //   <input phase>;
3684     //   buffer[i] = red;
3685     // }
3686     CGF.OMPFirstScanLoop = true;
3687     CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3688     FirstGen(CGF);
3689   }
3690   // #pragma omp barrier // in parallel region
3691   auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3692                     &ReductionOps,
3693                     &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3694     Action.Enter(CGF);
3695     // Emit prefix reduction:
3696     // #pragma omp master // in parallel region
3697     // for (int k = 0; k <= ceil(log2(n)); ++k)
3698     llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3699     llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3700     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3701     llvm::Function *F =
3702         CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3703     llvm::Value *Arg =
3704         CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3705     llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3706     F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3707     LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3708     LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3709     llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3710         OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3711     auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3712     CGF.EmitBlock(LoopBB);
3713     auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3714     // size pow2k = 1;
3715     auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3716     Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3717     Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3718     // for (size i = n - 1; i >= 2 ^ k; --i)
3719     //   tmp[i] op= tmp[i-pow2k];
3720     llvm::BasicBlock *InnerLoopBB =
3721         CGF.createBasicBlock("omp.inner.log.scan.body");
3722     llvm::BasicBlock *InnerExitBB =
3723         CGF.createBasicBlock("omp.inner.log.scan.exit");
3724     llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3725     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3726     CGF.EmitBlock(InnerLoopBB);
3727     auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3728     IVal->addIncoming(NMin1, LoopBB);
3729     {
3730       CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3731       auto *ILHS = LHSs.begin();
3732       auto *IRHS = RHSs.begin();
3733       for (const Expr *CopyArrayElem : CopyArrayElems) {
3734         const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3735         const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3736         Address LHSAddr = Address::invalid();
3737         {
3738           CodeGenFunction::OpaqueValueMapping IdxMapping(
3739               CGF,
3740               cast<OpaqueValueExpr>(
3741                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3742               RValue::get(IVal));
3743           LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3744         }
3745         PrivScope.addPrivate(LHSVD, LHSAddr);
3746         Address RHSAddr = Address::invalid();
3747         {
3748           llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3749           CodeGenFunction::OpaqueValueMapping IdxMapping(
3750               CGF,
3751               cast<OpaqueValueExpr>(
3752                   cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3753               RValue::get(OffsetIVal));
3754           RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3755         }
3756         PrivScope.addPrivate(RHSVD, RHSAddr);
3757         ++ILHS;
3758         ++IRHS;
3759       }
3760       PrivScope.Privatize();
3761       CGF.CGM.getOpenMPRuntime().emitReduction(
3762           CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3763           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3764     }
3765     llvm::Value *NextIVal =
3766         CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3767     IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3768     CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3769     CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3770     CGF.EmitBlock(InnerExitBB);
3771     llvm::Value *Next =
3772         CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3773     Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3774     // pow2k <<= 1;
3775     llvm::Value *NextPow2K =
3776         CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3777     Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3778     llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3779     CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3780     auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3781     CGF.EmitBlock(ExitBB);
3782   };
3783   if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3784     CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3785     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3786         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3787         /*ForceSimpleCall=*/true);
3788   } else {
3789     RegionCodeGenTy RCG(CodeGen);
3790     RCG(CGF);
3791   }
3792 
3793   CGF.OMPFirstScanLoop = false;
3794   SecondGen(CGF);
3795 }
3796 
emitWorksharingDirective(CodeGenFunction & CGF,const OMPLoopDirective & S,bool HasCancel)3797 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3798                                      const OMPLoopDirective &S,
3799                                      bool HasCancel) {
3800   bool HasLastprivates;
3801   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3802                    [](const OMPReductionClause *C) {
3803                      return C->getModifier() == OMPC_REDUCTION_inscan;
3804                    })) {
3805     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3806       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3807       OMPLoopScope LoopScope(CGF, S);
3808       return CGF.EmitScalarExpr(S.getNumIterations());
3809     };
3810     const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3811       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3812           CGF, S.getDirectiveKind(), HasCancel);
3813       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3814                                        emitForLoopBounds,
3815                                        emitDispatchForLoopBounds);
3816       // Emit an implicit barrier at the end.
3817       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3818                                                  OMPD_for);
3819     };
3820     const auto &&SecondGen = [&S, HasCancel,
3821                               &HasLastprivates](CodeGenFunction &CGF) {
3822       CodeGenFunction::OMPCancelStackRAII CancelRegion(
3823           CGF, S.getDirectiveKind(), HasCancel);
3824       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3825                                                    emitForLoopBounds,
3826                                                    emitDispatchForLoopBounds);
3827     };
3828     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3829       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3830     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3831     if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3832       emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3833   } else {
3834     CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3835                                                      HasCancel);
3836     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3837                                                  emitForLoopBounds,
3838                                                  emitDispatchForLoopBounds);
3839   }
3840   return HasLastprivates;
3841 }
3842 
isSupportedByOpenMPIRBuilder(const OMPForDirective & S)3843 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3844   if (S.hasCancel())
3845     return false;
3846   for (OMPClause *C : S.clauses()) {
3847     if (isa<OMPNowaitClause>(C))
3848       continue;
3849 
3850     if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3851       if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3852         return false;
3853       if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3854         return false;
3855       switch (SC->getScheduleKind()) {
3856       case OMPC_SCHEDULE_auto:
3857       case OMPC_SCHEDULE_dynamic:
3858       case OMPC_SCHEDULE_runtime:
3859       case OMPC_SCHEDULE_guided:
3860       case OMPC_SCHEDULE_static:
3861         continue;
3862       case OMPC_SCHEDULE_unknown:
3863         return false;
3864       }
3865     }
3866 
3867     return false;
3868   }
3869 
3870   return true;
3871 }
3872 
3873 static llvm::omp::ScheduleKind
convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind)3874 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3875   switch (ScheduleClauseKind) {
3876   case OMPC_SCHEDULE_unknown:
3877     return llvm::omp::OMP_SCHEDULE_Default;
3878   case OMPC_SCHEDULE_auto:
3879     return llvm::omp::OMP_SCHEDULE_Auto;
3880   case OMPC_SCHEDULE_dynamic:
3881     return llvm::omp::OMP_SCHEDULE_Dynamic;
3882   case OMPC_SCHEDULE_guided:
3883     return llvm::omp::OMP_SCHEDULE_Guided;
3884   case OMPC_SCHEDULE_runtime:
3885     return llvm::omp::OMP_SCHEDULE_Runtime;
3886   case OMPC_SCHEDULE_static:
3887     return llvm::omp::OMP_SCHEDULE_Static;
3888   }
3889   llvm_unreachable("Unhandled schedule kind");
3890 }
3891 
EmitOMPForDirective(const OMPForDirective & S)3892 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3893   bool HasLastprivates = false;
3894   bool UseOMPIRBuilder =
3895       CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3896   auto &&CodeGen = [this, &S, &HasLastprivates,
3897                     UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3898     // Use the OpenMPIRBuilder if enabled.
3899     if (UseOMPIRBuilder) {
3900       bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3901 
3902       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3903       llvm::Value *ChunkSize = nullptr;
3904       if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3905         SchedKind =
3906             convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3907         if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3908           ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3909       }
3910 
3911       // Emit the associated statement and get its loop representation.
3912       const Stmt *Inner = S.getRawStmt();
3913       llvm::CanonicalLoopInfo *CLI =
3914           EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3915 
3916       llvm::OpenMPIRBuilder &OMPBuilder =
3917           CGM.getOpenMPRuntime().getOMPBuilder();
3918       llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3919           AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3920       OMPBuilder.applyWorkshareLoop(
3921           Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3922           SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3923           /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3924           /*HasOrderedClause=*/false);
3925       return;
3926     }
3927 
3928     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3929   };
3930   {
3931     auto LPCRegion =
3932         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3933     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3934     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3935                                                 S.hasCancel());
3936   }
3937 
3938   if (!UseOMPIRBuilder) {
3939     // Emit an implicit barrier at the end.
3940     if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3941       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3942   }
3943   // Check for outer lastprivate conditional update.
3944   checkForLastprivateConditionalUpdate(*this, S);
3945 }
3946 
EmitOMPForSimdDirective(const OMPForSimdDirective & S)3947 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3948   bool HasLastprivates = false;
3949   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3950                                           PrePostActionTy &) {
3951     HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3952   };
3953   {
3954     auto LPCRegion =
3955         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3956     OMPLexicalScope Scope(*this, S, OMPD_unknown);
3957     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3958   }
3959 
3960   // Emit an implicit barrier at the end.
3961   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3962     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3963   // Check for outer lastprivate conditional update.
3964   checkForLastprivateConditionalUpdate(*this, S);
3965 }
3966 
createSectionLVal(CodeGenFunction & CGF,QualType Ty,const Twine & Name,llvm::Value * Init=nullptr)3967 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3968                                 const Twine &Name,
3969                                 llvm::Value *Init = nullptr) {
3970   LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3971   if (Init)
3972     CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3973   return LVal;
3974 }
3975 
EmitSections(const OMPExecutableDirective & S)3976 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3977   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3978   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3979   bool HasLastprivates = false;
3980   auto &&CodeGen = [&S, CapturedStmt, CS,
3981                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3982     const ASTContext &C = CGF.getContext();
3983     QualType KmpInt32Ty =
3984         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3985     // Emit helper vars inits.
3986     LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3987                                   CGF.Builder.getInt32(0));
3988     llvm::ConstantInt *GlobalUBVal = CS != nullptr
3989                                          ? CGF.Builder.getInt32(CS->size() - 1)
3990                                          : CGF.Builder.getInt32(0);
3991     LValue UB =
3992         createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3993     LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3994                                   CGF.Builder.getInt32(1));
3995     LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3996                                   CGF.Builder.getInt32(0));
3997     // Loop counter.
3998     LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3999     OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4000     CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4001     OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4002     CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4003     // Generate condition for loop.
4004     BinaryOperator *Cond = BinaryOperator::Create(
4005         C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4006         S.getBeginLoc(), FPOptionsOverride());
4007     // Increment for loop counter.
4008     UnaryOperator *Inc = UnaryOperator::Create(
4009         C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4010         S.getBeginLoc(), true, FPOptionsOverride());
4011     auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4012       // Iterate through all sections and emit a switch construct:
4013       // switch (IV) {
4014       //   case 0:
4015       //     <SectionStmt[0]>;
4016       //     break;
4017       // ...
4018       //   case <NumSection> - 1:
4019       //     <SectionStmt[<NumSection> - 1]>;
4020       //     break;
4021       // }
4022       // .omp.sections.exit:
4023       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4024       llvm::SwitchInst *SwitchStmt =
4025           CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4026                                    ExitBB, CS == nullptr ? 1 : CS->size());
4027       if (CS) {
4028         unsigned CaseNumber = 0;
4029         for (const Stmt *SubStmt : CS->children()) {
4030           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4031           CGF.EmitBlock(CaseBB);
4032           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4033           CGF.EmitStmt(SubStmt);
4034           CGF.EmitBranch(ExitBB);
4035           ++CaseNumber;
4036         }
4037       } else {
4038         llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4039         CGF.EmitBlock(CaseBB);
4040         SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4041         CGF.EmitStmt(CapturedStmt);
4042         CGF.EmitBranch(ExitBB);
4043       }
4044       CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4045     };
4046 
4047     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4048     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4049       // Emit implicit barrier to synchronize threads and avoid data races on
4050       // initialization of firstprivate variables and post-update of lastprivate
4051       // variables.
4052       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4053           CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4054           /*ForceSimpleCall=*/true);
4055     }
4056     CGF.EmitOMPPrivateClause(S, LoopScope);
4057     CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4058     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4059     CGF.EmitOMPReductionClauseInit(S, LoopScope);
4060     (void)LoopScope.Privatize();
4061     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4062       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4063 
4064     // Emit static non-chunked loop.
4065     OpenMPScheduleTy ScheduleKind;
4066     ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4067     CGOpenMPRuntime::StaticRTInput StaticInit(
4068         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
4069         LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
4070     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4071         CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4072     // UB = min(UB, GlobalUB);
4073     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4074     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4075         CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4076     CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4077     // IV = LB;
4078     CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4079     // while (idx <= UB) { BODY; ++idx; }
4080     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4081                          [](CodeGenFunction &) {});
4082     // Tell the runtime we are done.
4083     auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4084       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4085                                                      S.getDirectiveKind());
4086     };
4087     CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4088     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4089     // Emit post-update of the reduction variables if IsLastIter != 0.
4090     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4091       return CGF.Builder.CreateIsNotNull(
4092           CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4093     });
4094 
4095     // Emit final copy of the lastprivate variables if IsLastIter != 0.
4096     if (HasLastprivates)
4097       CGF.EmitOMPLastprivateClauseFinal(
4098           S, /*NoFinals=*/false,
4099           CGF.Builder.CreateIsNotNull(
4100               CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4101   };
4102 
4103   bool HasCancel = false;
4104   if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4105     HasCancel = OSD->hasCancel();
4106   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4107     HasCancel = OPSD->hasCancel();
4108   OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4109   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4110                                               HasCancel);
4111   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4112   // clause. Otherwise the barrier will be generated by the codegen for the
4113   // directive.
4114   if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4115     // Emit implicit barrier to synchronize threads and avoid data races on
4116     // initialization of firstprivate variables.
4117     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4118                                            OMPD_unknown);
4119   }
4120 }
4121 
EmitOMPSectionsDirective(const OMPSectionsDirective & S)4122 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4123   if (CGM.getLangOpts().OpenMPIRBuilder) {
4124     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4125     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4126     using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4127 
4128     auto FiniCB = [this](InsertPointTy IP) {
4129       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4130     };
4131 
4132     const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4133     const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4134     const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4135     llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4136     if (CS) {
4137       for (const Stmt *SubStmt : CS->children()) {
4138         auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4139                                          InsertPointTy CodeGenIP) {
4140           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4141               *this, SubStmt, AllocaIP, CodeGenIP, "section");
4142         };
4143         SectionCBVector.push_back(SectionCB);
4144       }
4145     } else {
4146       auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4147                                             InsertPointTy CodeGenIP) {
4148         OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4149             *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4150       };
4151       SectionCBVector.push_back(SectionCB);
4152     }
4153 
4154     // Privatization callback that performs appropriate action for
4155     // shared/private/firstprivate/lastprivate/copyin/... variables.
4156     //
4157     // TODO: This defaults to shared right now.
4158     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4159                      llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4160       // The next line is appropriate only for variables (Val) with the
4161       // data-sharing attribute "shared".
4162       ReplVal = &Val;
4163 
4164       return CodeGenIP;
4165     };
4166 
4167     CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4168     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4169     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4170         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4171     Builder.restoreIP(OMPBuilder.createSections(
4172         Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4173         S.getSingleClause<OMPNowaitClause>()));
4174     return;
4175   }
4176   {
4177     auto LPCRegion =
4178         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4179     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4180     EmitSections(S);
4181   }
4182   // Emit an implicit barrier at the end.
4183   if (!S.getSingleClause<OMPNowaitClause>()) {
4184     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4185                                            OMPD_sections);
4186   }
4187   // Check for outer lastprivate conditional update.
4188   checkForLastprivateConditionalUpdate(*this, S);
4189 }
4190 
EmitOMPSectionDirective(const OMPSectionDirective & S)4191 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4192   if (CGM.getLangOpts().OpenMPIRBuilder) {
4193     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4194     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4195 
4196     const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4197     auto FiniCB = [this](InsertPointTy IP) {
4198       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4199     };
4200 
4201     auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4202                                                    InsertPointTy CodeGenIP) {
4203       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4204           *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4205     };
4206 
4207     LexicalScope Scope(*this, S.getSourceRange());
4208     EmitStopPoint(&S);
4209     Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4210 
4211     return;
4212   }
4213   LexicalScope Scope(*this, S.getSourceRange());
4214   EmitStopPoint(&S);
4215   EmitStmt(S.getAssociatedStmt());
4216 }
4217 
EmitOMPSingleDirective(const OMPSingleDirective & S)4218 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4219   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4220   llvm::SmallVector<const Expr *, 8> DestExprs;
4221   llvm::SmallVector<const Expr *, 8> SrcExprs;
4222   llvm::SmallVector<const Expr *, 8> AssignmentOps;
4223   // Check if there are any 'copyprivate' clauses associated with this
4224   // 'single' construct.
4225   // Build a list of copyprivate variables along with helper expressions
4226   // (<source>, <destination>, <destination>=<source> expressions)
4227   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4228     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4229     DestExprs.append(C->destination_exprs().begin(),
4230                      C->destination_exprs().end());
4231     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4232     AssignmentOps.append(C->assignment_ops().begin(),
4233                          C->assignment_ops().end());
4234   }
4235   // Emit code for 'single' region along with 'copyprivate' clauses
4236   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4237     Action.Enter(CGF);
4238     OMPPrivateScope SingleScope(CGF);
4239     (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4240     CGF.EmitOMPPrivateClause(S, SingleScope);
4241     (void)SingleScope.Privatize();
4242     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4243   };
4244   {
4245     auto LPCRegion =
4246         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4247     OMPLexicalScope Scope(*this, S, OMPD_unknown);
4248     CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4249                                             CopyprivateVars, DestExprs,
4250                                             SrcExprs, AssignmentOps);
4251   }
4252   // Emit an implicit barrier at the end (to avoid data race on firstprivate
4253   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4254   if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4255     CGM.getOpenMPRuntime().emitBarrierCall(
4256         *this, S.getBeginLoc(),
4257         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4258   }
4259   // Check for outer lastprivate conditional update.
4260   checkForLastprivateConditionalUpdate(*this, S);
4261 }
4262 
emitMaster(CodeGenFunction & CGF,const OMPExecutableDirective & S)4263 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4264   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4265     Action.Enter(CGF);
4266     CGF.EmitStmt(S.getRawStmt());
4267   };
4268   CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4269 }
4270 
EmitOMPMasterDirective(const OMPMasterDirective & S)4271 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4272   if (CGM.getLangOpts().OpenMPIRBuilder) {
4273     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4274     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4275 
4276     const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4277 
4278     auto FiniCB = [this](InsertPointTy IP) {
4279       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4280     };
4281 
4282     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4283                                                   InsertPointTy CodeGenIP) {
4284       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4285           *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4286     };
4287 
4288     LexicalScope Scope(*this, S.getSourceRange());
4289     EmitStopPoint(&S);
4290     Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4291 
4292     return;
4293   }
4294   LexicalScope Scope(*this, S.getSourceRange());
4295   EmitStopPoint(&S);
4296   emitMaster(*this, S);
4297 }
4298 
emitMasked(CodeGenFunction & CGF,const OMPExecutableDirective & S)4299 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4300   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4301     Action.Enter(CGF);
4302     CGF.EmitStmt(S.getRawStmt());
4303   };
4304   Expr *Filter = nullptr;
4305   if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4306     Filter = FilterClause->getThreadID();
4307   CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4308                                               Filter);
4309 }
4310 
EmitOMPMaskedDirective(const OMPMaskedDirective & S)4311 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4312   if (CGM.getLangOpts().OpenMPIRBuilder) {
4313     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4314     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4315 
4316     const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4317     const Expr *Filter = nullptr;
4318     if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4319       Filter = FilterClause->getThreadID();
4320     llvm::Value *FilterVal = Filter
4321                                  ? EmitScalarExpr(Filter, CGM.Int32Ty)
4322                                  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4323 
4324     auto FiniCB = [this](InsertPointTy IP) {
4325       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4326     };
4327 
4328     auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4329                                                   InsertPointTy CodeGenIP) {
4330       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4331           *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4332     };
4333 
4334     LexicalScope Scope(*this, S.getSourceRange());
4335     EmitStopPoint(&S);
4336     Builder.restoreIP(
4337         OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4338 
4339     return;
4340   }
4341   LexicalScope Scope(*this, S.getSourceRange());
4342   EmitStopPoint(&S);
4343   emitMasked(*this, S);
4344 }
4345 
EmitOMPCriticalDirective(const OMPCriticalDirective & S)4346 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4347   if (CGM.getLangOpts().OpenMPIRBuilder) {
4348     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4349     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4350 
4351     const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4352     const Expr *Hint = nullptr;
4353     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4354       Hint = HintClause->getHint();
4355 
4356     // TODO: This is slightly different from what's currently being done in
4357     // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4358     // about typing is final.
4359     llvm::Value *HintInst = nullptr;
4360     if (Hint)
4361       HintInst =
4362           Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4363 
4364     auto FiniCB = [this](InsertPointTy IP) {
4365       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4366     };
4367 
4368     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4369                                                     InsertPointTy CodeGenIP) {
4370       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4371           *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4372     };
4373 
4374     LexicalScope Scope(*this, S.getSourceRange());
4375     EmitStopPoint(&S);
4376     Builder.restoreIP(OMPBuilder.createCritical(
4377         Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4378         HintInst));
4379 
4380     return;
4381   }
4382 
4383   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4384     Action.Enter(CGF);
4385     CGF.EmitStmt(S.getAssociatedStmt());
4386   };
4387   const Expr *Hint = nullptr;
4388   if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4389     Hint = HintClause->getHint();
4390   LexicalScope Scope(*this, S.getSourceRange());
4391   EmitStopPoint(&S);
4392   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4393                                             S.getDirectiveName().getAsString(),
4394                                             CodeGen, S.getBeginLoc(), Hint);
4395 }
4396 
EmitOMPParallelForDirective(const OMPParallelForDirective & S)4397 void CodeGenFunction::EmitOMPParallelForDirective(
4398     const OMPParallelForDirective &S) {
4399   // Emit directive as a combined directive that consists of two implicit
4400   // directives: 'parallel' with 'for' directive.
4401   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4402     Action.Enter(CGF);
4403     emitOMPCopyinClause(CGF, S);
4404     (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4405   };
4406   {
4407     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4408       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4409       CGCapturedStmtInfo CGSI(CR_OpenMP);
4410       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4411       OMPLoopScope LoopScope(CGF, S);
4412       return CGF.EmitScalarExpr(S.getNumIterations());
4413     };
4414     bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4415                      [](const OMPReductionClause *C) {
4416                        return C->getModifier() == OMPC_REDUCTION_inscan;
4417                      });
4418     if (IsInscan)
4419       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4420     auto LPCRegion =
4421         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4422     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4423                                    emitEmptyBoundParameters);
4424     if (IsInscan)
4425       emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4426   }
4427   // Check for outer lastprivate conditional update.
4428   checkForLastprivateConditionalUpdate(*this, S);
4429 }
4430 
EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective & S)4431 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4432     const OMPParallelForSimdDirective &S) {
4433   // Emit directive as a combined directive that consists of two implicit
4434   // directives: 'parallel' with 'for' directive.
4435   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4436     Action.Enter(CGF);
4437     emitOMPCopyinClause(CGF, S);
4438     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4439   };
4440   {
4441     const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4442       CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4443       CGCapturedStmtInfo CGSI(CR_OpenMP);
4444       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4445       OMPLoopScope LoopScope(CGF, S);
4446       return CGF.EmitScalarExpr(S.getNumIterations());
4447     };
4448     bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4449                      [](const OMPReductionClause *C) {
4450                        return C->getModifier() == OMPC_REDUCTION_inscan;
4451                      });
4452     if (IsInscan)
4453       emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4454     auto LPCRegion =
4455         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4456     emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4457                                    emitEmptyBoundParameters);
4458     if (IsInscan)
4459       emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4460   }
4461   // Check for outer lastprivate conditional update.
4462   checkForLastprivateConditionalUpdate(*this, S);
4463 }
4464 
EmitOMPParallelMasterDirective(const OMPParallelMasterDirective & S)4465 void CodeGenFunction::EmitOMPParallelMasterDirective(
4466     const OMPParallelMasterDirective &S) {
4467   // Emit directive as a combined directive that consists of two implicit
4468   // directives: 'parallel' with 'master' directive.
4469   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4470     Action.Enter(CGF);
4471     OMPPrivateScope PrivateScope(CGF);
4472     emitOMPCopyinClause(CGF, S);
4473     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4474     CGF.EmitOMPPrivateClause(S, PrivateScope);
4475     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4476     (void)PrivateScope.Privatize();
4477     emitMaster(CGF, S);
4478     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4479   };
4480   {
4481     auto LPCRegion =
4482         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4483     emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4484                                    emitEmptyBoundParameters);
4485     emitPostUpdateForReductionClause(*this, S,
4486                                      [](CodeGenFunction &) { return nullptr; });
4487   }
4488   // Check for outer lastprivate conditional update.
4489   checkForLastprivateConditionalUpdate(*this, S);
4490 }
4491 
EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective & S)4492 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4493     const OMPParallelMaskedDirective &S) {
4494   // Emit directive as a combined directive that consists of two implicit
4495   // directives: 'parallel' with 'masked' directive.
4496   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4497     Action.Enter(CGF);
4498     OMPPrivateScope PrivateScope(CGF);
4499     emitOMPCopyinClause(CGF, S);
4500     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4501     CGF.EmitOMPPrivateClause(S, PrivateScope);
4502     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4503     (void)PrivateScope.Privatize();
4504     emitMasked(CGF, S);
4505     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4506   };
4507   {
4508     auto LPCRegion =
4509         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4510     emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4511                                    emitEmptyBoundParameters);
4512     emitPostUpdateForReductionClause(*this, S,
4513                                      [](CodeGenFunction &) { return nullptr; });
4514   }
4515   // Check for outer lastprivate conditional update.
4516   checkForLastprivateConditionalUpdate(*this, S);
4517 }
4518 
EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective & S)4519 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4520     const OMPParallelSectionsDirective &S) {
4521   // Emit directive as a combined directive that consists of two implicit
4522   // directives: 'parallel' with 'sections' directive.
4523   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4524     Action.Enter(CGF);
4525     emitOMPCopyinClause(CGF, S);
4526     CGF.EmitSections(S);
4527   };
4528   {
4529     auto LPCRegion =
4530         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4531     emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4532                                    emitEmptyBoundParameters);
4533   }
4534   // Check for outer lastprivate conditional update.
4535   checkForLastprivateConditionalUpdate(*this, S);
4536 }
4537 
4538 namespace {
4539 /// Get the list of variables declared in the context of the untied tasks.
4540 class CheckVarsEscapingUntiedTaskDeclContext final
4541     : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4542   llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4543 
4544 public:
4545   explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4546   virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
VisitDeclStmt(const DeclStmt * S)4547   void VisitDeclStmt(const DeclStmt *S) {
4548     if (!S)
4549       return;
4550     // Need to privatize only local vars, static locals can be processed as is.
4551     for (const Decl *D : S->decls()) {
4552       if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4553         if (VD->hasLocalStorage())
4554           PrivateDecls.push_back(VD);
4555     }
4556   }
VisitOMPExecutableDirective(const OMPExecutableDirective *)4557   void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
VisitCapturedStmt(const CapturedStmt *)4558   void VisitCapturedStmt(const CapturedStmt *) {}
VisitLambdaExpr(const LambdaExpr *)4559   void VisitLambdaExpr(const LambdaExpr *) {}
VisitBlockExpr(const BlockExpr *)4560   void VisitBlockExpr(const BlockExpr *) {}
VisitStmt(const Stmt * S)4561   void VisitStmt(const Stmt *S) {
4562     if (!S)
4563       return;
4564     for (const Stmt *Child : S->children())
4565       if (Child)
4566         Visit(Child);
4567   }
4568 
4569   /// Swaps list of vars with the provided one.
getPrivateDecls() const4570   ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4571 };
4572 } // anonymous namespace
4573 
buildDependences(const OMPExecutableDirective & S,OMPTaskDataTy & Data)4574 static void buildDependences(const OMPExecutableDirective &S,
4575                              OMPTaskDataTy &Data) {
4576 
4577   // First look for 'omp_all_memory' and add this first.
4578   bool OmpAllMemory = false;
4579   if (llvm::any_of(
4580           S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4581             return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4582                    C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4583           })) {
4584     OmpAllMemory = true;
4585     // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4586     // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4587     // simplify.
4588     OMPTaskDataTy::DependData &DD =
4589         Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4590                                       /*IteratorExpr=*/nullptr);
4591     // Add a nullptr Expr to simplify the codegen in emitDependData.
4592     DD.DepExprs.push_back(nullptr);
4593   }
4594   // Add remaining dependences skipping any 'out' or 'inout' if they are
4595   // overridden by 'omp_all_memory'.
4596   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4597     OpenMPDependClauseKind Kind = C->getDependencyKind();
4598     if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4599       continue;
4600     if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4601       continue;
4602     OMPTaskDataTy::DependData &DD =
4603         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4604     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4605   }
4606 }
4607 
EmitOMPTaskBasedDirective(const OMPExecutableDirective & S,const OpenMPDirectiveKind CapturedRegion,const RegionCodeGenTy & BodyGen,const TaskGenTy & TaskGen,OMPTaskDataTy & Data)4608 void CodeGenFunction::EmitOMPTaskBasedDirective(
4609     const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4610     const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4611     OMPTaskDataTy &Data) {
4612   // Emit outlined function for task construct.
4613   const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4614   auto I = CS->getCapturedDecl()->param_begin();
4615   auto PartId = std::next(I);
4616   auto TaskT = std::next(I, 4);
4617   // Check if the task is final
4618   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4619     // If the condition constant folds and can be elided, try to avoid emitting
4620     // the condition and the dead arm of the if/else.
4621     const Expr *Cond = Clause->getCondition();
4622     bool CondConstant;
4623     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4624       Data.Final.setInt(CondConstant);
4625     else
4626       Data.Final.setPointer(EvaluateExprAsBool(Cond));
4627   } else {
4628     // By default the task is not final.
4629     Data.Final.setInt(/*IntVal=*/false);
4630   }
4631   // Check if the task has 'priority' clause.
4632   if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4633     const Expr *Prio = Clause->getPriority();
4634     Data.Priority.setInt(/*IntVal=*/true);
4635     Data.Priority.setPointer(EmitScalarConversion(
4636         EmitScalarExpr(Prio), Prio->getType(),
4637         getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4638         Prio->getExprLoc()));
4639   }
4640   // The first function argument for tasks is a thread id, the second one is a
4641   // part id (0 for tied tasks, >=0 for untied task).
4642   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4643   // Get list of private variables.
4644   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4645     auto IRef = C->varlist_begin();
4646     for (const Expr *IInit : C->private_copies()) {
4647       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4648       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4649         Data.PrivateVars.push_back(*IRef);
4650         Data.PrivateCopies.push_back(IInit);
4651       }
4652       ++IRef;
4653     }
4654   }
4655   EmittedAsPrivate.clear();
4656   // Get list of firstprivate variables.
4657   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4658     auto IRef = C->varlist_begin();
4659     auto IElemInitRef = C->inits().begin();
4660     for (const Expr *IInit : C->private_copies()) {
4661       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4662       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4663         Data.FirstprivateVars.push_back(*IRef);
4664         Data.FirstprivateCopies.push_back(IInit);
4665         Data.FirstprivateInits.push_back(*IElemInitRef);
4666       }
4667       ++IRef;
4668       ++IElemInitRef;
4669     }
4670   }
4671   // Get list of lastprivate variables (for taskloops).
4672   llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4673   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4674     auto IRef = C->varlist_begin();
4675     auto ID = C->destination_exprs().begin();
4676     for (const Expr *IInit : C->private_copies()) {
4677       const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4678       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4679         Data.LastprivateVars.push_back(*IRef);
4680         Data.LastprivateCopies.push_back(IInit);
4681       }
4682       LastprivateDstsOrigs.insert(
4683           std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4684                          cast<DeclRefExpr>(*IRef)));
4685       ++IRef;
4686       ++ID;
4687     }
4688   }
4689   SmallVector<const Expr *, 4> LHSs;
4690   SmallVector<const Expr *, 4> RHSs;
4691   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4692     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4693     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4694     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4695     Data.ReductionOps.append(C->reduction_ops().begin(),
4696                              C->reduction_ops().end());
4697     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4698     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4699   }
4700   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4701       *this, S.getBeginLoc(), LHSs, RHSs, Data);
4702   // Build list of dependences.
4703   buildDependences(S, Data);
4704   // Get list of local vars for untied tasks.
4705   if (!Data.Tied) {
4706     CheckVarsEscapingUntiedTaskDeclContext Checker;
4707     Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4708     Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4709                               Checker.getPrivateDecls().end());
4710   }
4711   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4712                     CapturedRegion](CodeGenFunction &CGF,
4713                                     PrePostActionTy &Action) {
4714     llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4715                     std::pair<Address, Address>>
4716         UntiedLocalVars;
4717     // Set proper addresses for generated private copies.
4718     OMPPrivateScope Scope(CGF);
4719     // Generate debug info for variables present in shared clause.
4720     if (auto *DI = CGF.getDebugInfo()) {
4721       llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4722           CGF.CapturedStmtInfo->getCaptureFields();
4723       llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4724       if (CaptureFields.size() && ContextValue) {
4725         unsigned CharWidth = CGF.getContext().getCharWidth();
4726         // The shared variables are packed together as members of structure.
4727         // So the address of each shared variable can be computed by adding
4728         // offset of it (within record) to the base address of record. For each
4729         // shared variable, debug intrinsic llvm.dbg.declare is generated with
4730         // appropriate expressions (DIExpression).
4731         // Ex:
4732         //  %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4733         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4734         //            metadata !svar1,
4735         //            metadata !DIExpression(DW_OP_deref))
4736         //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4737         //            metadata !svar2,
4738         //            metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4739         for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4740           const VarDecl *SharedVar = It->first;
4741           RecordDecl *CaptureRecord = It->second->getParent();
4742           const ASTRecordLayout &Layout =
4743               CGF.getContext().getASTRecordLayout(CaptureRecord);
4744           unsigned Offset =
4745               Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4746           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4747             (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4748                                                 CGF.Builder, false);
4749           llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4750           // Get the call dbg.declare instruction we just created and update
4751           // its DIExpression to add offset to base address.
4752           if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4753             SmallVector<uint64_t, 8> Ops;
4754             // Add offset to the base address if non zero.
4755             if (Offset) {
4756               Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4757               Ops.push_back(Offset);
4758             }
4759             Ops.push_back(llvm::dwarf::DW_OP_deref);
4760             auto &Ctx = DDI->getContext();
4761             llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4762             Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4763           }
4764         }
4765       }
4766     }
4767     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4768     if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4769         !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4770       enum { PrivatesParam = 2, CopyFnParam = 3 };
4771       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4772           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4773       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4774           CS->getCapturedDecl()->getParam(PrivatesParam)));
4775       // Map privates.
4776       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4777       llvm::SmallVector<llvm::Value *, 16> CallArgs;
4778       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4779       CallArgs.push_back(PrivatesPtr);
4780       ParamTypes.push_back(PrivatesPtr->getType());
4781       for (const Expr *E : Data.PrivateVars) {
4782         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4783         Address PrivatePtr = CGF.CreateMemTemp(
4784             CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4785         PrivatePtrs.emplace_back(VD, PrivatePtr);
4786         CallArgs.push_back(PrivatePtr.getPointer());
4787         ParamTypes.push_back(PrivatePtr.getType());
4788       }
4789       for (const Expr *E : Data.FirstprivateVars) {
4790         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4791         Address PrivatePtr =
4792             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4793                               ".firstpriv.ptr.addr");
4794         PrivatePtrs.emplace_back(VD, PrivatePtr);
4795         FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4796         CallArgs.push_back(PrivatePtr.getPointer());
4797         ParamTypes.push_back(PrivatePtr.getType());
4798       }
4799       for (const Expr *E : Data.LastprivateVars) {
4800         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4801         Address PrivatePtr =
4802             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4803                               ".lastpriv.ptr.addr");
4804         PrivatePtrs.emplace_back(VD, PrivatePtr);
4805         CallArgs.push_back(PrivatePtr.getPointer());
4806         ParamTypes.push_back(PrivatePtr.getType());
4807       }
4808       for (const VarDecl *VD : Data.PrivateLocals) {
4809         QualType Ty = VD->getType().getNonReferenceType();
4810         if (VD->getType()->isLValueReferenceType())
4811           Ty = CGF.getContext().getPointerType(Ty);
4812         if (isAllocatableDecl(VD))
4813           Ty = CGF.getContext().getPointerType(Ty);
4814         Address PrivatePtr = CGF.CreateMemTemp(
4815             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4816         auto Result = UntiedLocalVars.insert(
4817             std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4818         // If key exists update in place.
4819         if (Result.second == false)
4820           *Result.first = std::make_pair(
4821               VD, std::make_pair(PrivatePtr, Address::invalid()));
4822         CallArgs.push_back(PrivatePtr.getPointer());
4823         ParamTypes.push_back(PrivatePtr.getType());
4824       }
4825       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4826                                                ParamTypes, /*isVarArg=*/false);
4827       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4828           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4829       for (const auto &Pair : LastprivateDstsOrigs) {
4830         const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4831         DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4832                         /*RefersToEnclosingVariableOrCapture=*/
4833                         CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4834                         Pair.second->getType(), VK_LValue,
4835                         Pair.second->getExprLoc());
4836         Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF));
4837       }
4838       for (const auto &Pair : PrivatePtrs) {
4839         Address Replacement = Address(
4840             CGF.Builder.CreateLoad(Pair.second),
4841             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4842             CGF.getContext().getDeclAlign(Pair.first));
4843         Scope.addPrivate(Pair.first, Replacement);
4844         if (auto *DI = CGF.getDebugInfo())
4845           if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4846             (void)DI->EmitDeclareOfAutoVariable(
4847                 Pair.first, Pair.second.getPointer(), CGF.Builder,
4848                 /*UsePointerValue*/ true);
4849       }
4850       // Adjust mapping for internal locals by mapping actual memory instead of
4851       // a pointer to this memory.
4852       for (auto &Pair : UntiedLocalVars) {
4853         QualType VDType = Pair.first->getType().getNonReferenceType();
4854         if (Pair.first->getType()->isLValueReferenceType())
4855           VDType = CGF.getContext().getPointerType(VDType);
4856         if (isAllocatableDecl(Pair.first)) {
4857           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4858           Address Replacement(
4859               Ptr,
4860               CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4861               CGF.getPointerAlign());
4862           Pair.second.first = Replacement;
4863           Ptr = CGF.Builder.CreateLoad(Replacement);
4864           Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4865                                 CGF.getContext().getDeclAlign(Pair.first));
4866           Pair.second.second = Replacement;
4867         } else {
4868           llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4869           Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4870                               CGF.getContext().getDeclAlign(Pair.first));
4871           Pair.second.first = Replacement;
4872         }
4873       }
4874     }
4875     if (Data.Reductions) {
4876       OMPPrivateScope FirstprivateScope(CGF);
4877       for (const auto &Pair : FirstprivatePtrs) {
4878         Address Replacement(
4879             CGF.Builder.CreateLoad(Pair.second),
4880             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4881             CGF.getContext().getDeclAlign(Pair.first));
4882         FirstprivateScope.addPrivate(Pair.first, Replacement);
4883       }
4884       (void)FirstprivateScope.Privatize();
4885       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4886       ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4887                              Data.ReductionCopies, Data.ReductionOps);
4888       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4889           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4890       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4891         RedCG.emitSharedOrigLValue(CGF, Cnt);
4892         RedCG.emitAggregateType(CGF, Cnt);
4893         // FIXME: This must removed once the runtime library is fixed.
4894         // Emit required threadprivate variables for
4895         // initializer/combiner/finalizer.
4896         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4897                                                            RedCG, Cnt);
4898         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4899             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4900         Replacement =
4901             Address(CGF.EmitScalarConversion(
4902                         Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4903                         CGF.getContext().getPointerType(
4904                             Data.ReductionCopies[Cnt]->getType()),
4905                         Data.ReductionCopies[Cnt]->getExprLoc()),
4906                     CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4907                     Replacement.getAlignment());
4908         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4909         Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4910       }
4911     }
4912     // Privatize all private variables except for in_reduction items.
4913     (void)Scope.Privatize();
4914     SmallVector<const Expr *, 4> InRedVars;
4915     SmallVector<const Expr *, 4> InRedPrivs;
4916     SmallVector<const Expr *, 4> InRedOps;
4917     SmallVector<const Expr *, 4> TaskgroupDescriptors;
4918     for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4919       auto IPriv = C->privates().begin();
4920       auto IRed = C->reduction_ops().begin();
4921       auto ITD = C->taskgroup_descriptors().begin();
4922       for (const Expr *Ref : C->varlists()) {
4923         InRedVars.emplace_back(Ref);
4924         InRedPrivs.emplace_back(*IPriv);
4925         InRedOps.emplace_back(*IRed);
4926         TaskgroupDescriptors.emplace_back(*ITD);
4927         std::advance(IPriv, 1);
4928         std::advance(IRed, 1);
4929         std::advance(ITD, 1);
4930       }
4931     }
4932     // Privatize in_reduction items here, because taskgroup descriptors must be
4933     // privatized earlier.
4934     OMPPrivateScope InRedScope(CGF);
4935     if (!InRedVars.empty()) {
4936       ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4937       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4938         RedCG.emitSharedOrigLValue(CGF, Cnt);
4939         RedCG.emitAggregateType(CGF, Cnt);
4940         // The taskgroup descriptor variable is always implicit firstprivate and
4941         // privatized already during processing of the firstprivates.
4942         // FIXME: This must removed once the runtime library is fixed.
4943         // Emit required threadprivate variables for
4944         // initializer/combiner/finalizer.
4945         CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4946                                                            RedCG, Cnt);
4947         llvm::Value *ReductionsPtr;
4948         if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4949           ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4950                                                TRExpr->getExprLoc());
4951         } else {
4952           ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4953         }
4954         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4955             CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4956         Replacement = Address(
4957             CGF.EmitScalarConversion(
4958                 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4959                 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4960                 InRedPrivs[Cnt]->getExprLoc()),
4961             CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
4962             Replacement.getAlignment());
4963         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4964         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4965       }
4966     }
4967     (void)InRedScope.Privatize();
4968 
4969     CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4970                                                              UntiedLocalVars);
4971     Action.Enter(CGF);
4972     BodyGen(CGF);
4973   };
4974   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4975       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4976       Data.NumberOfParts);
4977   OMPLexicalScope Scope(*this, S, std::nullopt,
4978                         !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4979                             !isOpenMPSimdDirective(S.getDirectiveKind()));
4980   TaskGen(*this, OutlinedFn, Data);
4981 }
4982 
4983 static ImplicitParamDecl *
createImplicitFirstprivateForType(ASTContext & C,OMPTaskDataTy & Data,QualType Ty,CapturedDecl * CD,SourceLocation Loc)4984 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4985                                   QualType Ty, CapturedDecl *CD,
4986                                   SourceLocation Loc) {
4987   auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4988                                            ImplicitParamKind::Other);
4989   auto *OrigRef = DeclRefExpr::Create(
4990       C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4991       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4992   auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4993                                               ImplicitParamKind::Other);
4994   auto *PrivateRef = DeclRefExpr::Create(
4995       C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4996       /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4997   QualType ElemType = C.getBaseElementType(Ty);
4998   auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4999                                            ImplicitParamKind::Other);
5000   auto *InitRef = DeclRefExpr::Create(
5001       C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
5002       /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5003   PrivateVD->setInitStyle(VarDecl::CInit);
5004   PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
5005                                               InitRef, /*BasePath=*/nullptr,
5006                                               VK_PRValue, FPOptionsOverride()));
5007   Data.FirstprivateVars.emplace_back(OrigRef);
5008   Data.FirstprivateCopies.emplace_back(PrivateRef);
5009   Data.FirstprivateInits.emplace_back(InitRef);
5010   return OrigVD;
5011 }
5012 
EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective & S,const RegionCodeGenTy & BodyGen,OMPTargetDataInfo & InputInfo)5013 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5014     const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5015     OMPTargetDataInfo &InputInfo) {
5016   // Emit outlined function for task construct.
5017   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5018   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5019   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5020   auto I = CS->getCapturedDecl()->param_begin();
5021   auto PartId = std::next(I);
5022   auto TaskT = std::next(I, 4);
5023   OMPTaskDataTy Data;
5024   // The task is not final.
5025   Data.Final.setInt(/*IntVal=*/false);
5026   // Get list of firstprivate variables.
5027   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5028     auto IRef = C->varlist_begin();
5029     auto IElemInitRef = C->inits().begin();
5030     for (auto *IInit : C->private_copies()) {
5031       Data.FirstprivateVars.push_back(*IRef);
5032       Data.FirstprivateCopies.push_back(IInit);
5033       Data.FirstprivateInits.push_back(*IElemInitRef);
5034       ++IRef;
5035       ++IElemInitRef;
5036     }
5037   }
5038   SmallVector<const Expr *, 4> LHSs;
5039   SmallVector<const Expr *, 4> RHSs;
5040   for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5041     Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5042     Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5043     Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5044     Data.ReductionOps.append(C->reduction_ops().begin(),
5045                              C->reduction_ops().end());
5046     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5047     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5048   }
5049   OMPPrivateScope TargetScope(*this);
5050   VarDecl *BPVD = nullptr;
5051   VarDecl *PVD = nullptr;
5052   VarDecl *SVD = nullptr;
5053   VarDecl *MVD = nullptr;
5054   if (InputInfo.NumberOfTargetItems > 0) {
5055     auto *CD = CapturedDecl::Create(
5056         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5057     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5058     QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5059         getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal,
5060         /*IndexTypeQuals=*/0);
5061     BPVD = createImplicitFirstprivateForType(
5062         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5063     PVD = createImplicitFirstprivateForType(
5064         getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5065     QualType SizesType = getContext().getConstantArrayType(
5066         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5067         ArrSize, nullptr, ArraySizeModifier::Normal,
5068         /*IndexTypeQuals=*/0);
5069     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5070                                             S.getBeginLoc());
5071     TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5072     TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5073     TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5074     // If there is no user-defined mapper, the mapper array will be nullptr. In
5075     // this case, we don't need to privatize it.
5076     if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5077             InputInfo.MappersArray.getPointer())) {
5078       MVD = createImplicitFirstprivateForType(
5079           getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5080       TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5081     }
5082   }
5083   (void)TargetScope.Privatize();
5084   buildDependences(S, Data);
5085   auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5086                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5087     // Set proper addresses for generated private copies.
5088     OMPPrivateScope Scope(CGF);
5089     if (!Data.FirstprivateVars.empty()) {
5090       enum { PrivatesParam = 2, CopyFnParam = 3 };
5091       llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5092           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5093       llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5094           CS->getCapturedDecl()->getParam(PrivatesParam)));
5095       // Map privates.
5096       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5097       llvm::SmallVector<llvm::Value *, 16> CallArgs;
5098       llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5099       CallArgs.push_back(PrivatesPtr);
5100       ParamTypes.push_back(PrivatesPtr->getType());
5101       for (const Expr *E : Data.FirstprivateVars) {
5102         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5103         Address PrivatePtr =
5104             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
5105                               ".firstpriv.ptr.addr");
5106         PrivatePtrs.emplace_back(VD, PrivatePtr);
5107         CallArgs.push_back(PrivatePtr.getPointer());
5108         ParamTypes.push_back(PrivatePtr.getType());
5109       }
5110       auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5111                                                ParamTypes, /*isVarArg=*/false);
5112       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5113           CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5114       for (const auto &Pair : PrivatePtrs) {
5115         Address Replacement(
5116             CGF.Builder.CreateLoad(Pair.second),
5117             CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5118             CGF.getContext().getDeclAlign(Pair.first));
5119         Scope.addPrivate(Pair.first, Replacement);
5120       }
5121     }
5122     CGF.processInReduction(S, Data, CGF, CS, Scope);
5123     if (InputInfo.NumberOfTargetItems > 0) {
5124       InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5125           CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5126       InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5127           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5128       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5129           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5130       // If MVD is nullptr, the mapper array is not privatized
5131       if (MVD)
5132         InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5133             CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5134     }
5135 
5136     Action.Enter(CGF);
5137     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5138     auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5139     if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5140         needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) {
5141       // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5142       // enclosing this target region. This will indirectly set the thread_limit
5143       // for every applicable construct within target region.
5144       CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5145           CGF, TL->getThreadLimit(), S.getBeginLoc());
5146     }
5147     BodyGen(CGF);
5148   };
5149   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5150       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5151       Data.NumberOfParts);
5152   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5153   IntegerLiteral IfCond(getContext(), TrueOrFalse,
5154                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5155                         SourceLocation());
5156   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5157                                       SharedsTy, CapturedStruct, &IfCond, Data);
5158 }
5159 
processInReduction(const OMPExecutableDirective & S,OMPTaskDataTy & Data,CodeGenFunction & CGF,const CapturedStmt * CS,OMPPrivateScope & Scope)5160 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5161                                          OMPTaskDataTy &Data,
5162                                          CodeGenFunction &CGF,
5163                                          const CapturedStmt *CS,
5164                                          OMPPrivateScope &Scope) {
5165   if (Data.Reductions) {
5166     OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5167     OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5168     ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5169                            Data.ReductionCopies, Data.ReductionOps);
5170     llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5171         CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4)));
5172     for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5173       RedCG.emitSharedOrigLValue(CGF, Cnt);
5174       RedCG.emitAggregateType(CGF, Cnt);
5175       // FIXME: This must removed once the runtime library is fixed.
5176       // Emit required threadprivate variables for
5177       // initializer/combiner/finalizer.
5178       CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5179                                                          RedCG, Cnt);
5180       Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5181           CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5182       Replacement =
5183           Address(CGF.EmitScalarConversion(
5184                       Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5185                       CGF.getContext().getPointerType(
5186                           Data.ReductionCopies[Cnt]->getType()),
5187                       Data.ReductionCopies[Cnt]->getExprLoc()),
5188                   CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5189                   Replacement.getAlignment());
5190       Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5191       Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5192     }
5193   }
5194   (void)Scope.Privatize();
5195   SmallVector<const Expr *, 4> InRedVars;
5196   SmallVector<const Expr *, 4> InRedPrivs;
5197   SmallVector<const Expr *, 4> InRedOps;
5198   SmallVector<const Expr *, 4> TaskgroupDescriptors;
5199   for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5200     auto IPriv = C->privates().begin();
5201     auto IRed = C->reduction_ops().begin();
5202     auto ITD = C->taskgroup_descriptors().begin();
5203     for (const Expr *Ref : C->varlists()) {
5204       InRedVars.emplace_back(Ref);
5205       InRedPrivs.emplace_back(*IPriv);
5206       InRedOps.emplace_back(*IRed);
5207       TaskgroupDescriptors.emplace_back(*ITD);
5208       std::advance(IPriv, 1);
5209       std::advance(IRed, 1);
5210       std::advance(ITD, 1);
5211     }
5212   }
5213   OMPPrivateScope InRedScope(CGF);
5214   if (!InRedVars.empty()) {
5215     ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5216     for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5217       RedCG.emitSharedOrigLValue(CGF, Cnt);
5218       RedCG.emitAggregateType(CGF, Cnt);
5219       // FIXME: This must removed once the runtime library is fixed.
5220       // Emit required threadprivate variables for
5221       // initializer/combiner/finalizer.
5222       CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5223                                                          RedCG, Cnt);
5224       llvm::Value *ReductionsPtr;
5225       if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5226         ReductionsPtr =
5227             CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5228       } else {
5229         ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5230       }
5231       Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5232           CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5233       Replacement = Address(
5234           CGF.EmitScalarConversion(
5235               Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5236               CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5237               InRedPrivs[Cnt]->getExprLoc()),
5238           CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5239           Replacement.getAlignment());
5240       Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5241       InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5242     }
5243   }
5244   (void)InRedScope.Privatize();
5245 }
5246 
EmitOMPTaskDirective(const OMPTaskDirective & S)5247 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5248   // Emit outlined function for task construct.
5249   const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5250   Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5251   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5252   const Expr *IfCond = nullptr;
5253   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5254     if (C->getNameModifier() == OMPD_unknown ||
5255         C->getNameModifier() == OMPD_task) {
5256       IfCond = C->getCondition();
5257       break;
5258     }
5259   }
5260 
5261   OMPTaskDataTy Data;
5262   // Check if we should emit tied or untied task.
5263   Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5264   auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5265     CGF.EmitStmt(CS->getCapturedStmt());
5266   };
5267   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5268                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5269                             const OMPTaskDataTy &Data) {
5270     CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5271                                             SharedsTy, CapturedStruct, IfCond,
5272                                             Data);
5273   };
5274   auto LPCRegion =
5275       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5276   EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5277 }
5278 
EmitOMPTaskyieldDirective(const OMPTaskyieldDirective & S)5279 void CodeGenFunction::EmitOMPTaskyieldDirective(
5280     const OMPTaskyieldDirective &S) {
5281   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5282 }
5283 
EmitOMPErrorDirective(const OMPErrorDirective & S)5284 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5285   const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5286   Expr *ME = MC ? MC->getMessageString() : nullptr;
5287   const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5288   bool IsFatal = false;
5289   if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5290     IsFatal = true;
5291   CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5292 }
5293 
EmitOMPBarrierDirective(const OMPBarrierDirective & S)5294 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5295   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5296 }
5297 
EmitOMPTaskwaitDirective(const OMPTaskwaitDirective & S)5298 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5299   OMPTaskDataTy Data;
5300   // Build list of dependences
5301   buildDependences(S, Data);
5302   Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5303   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5304 }
5305 
isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective & T)5306 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5307   return T.clauses().empty();
5308 }
5309 
EmitOMPTaskgroupDirective(const OMPTaskgroupDirective & S)5310 void CodeGenFunction::EmitOMPTaskgroupDirective(
5311     const OMPTaskgroupDirective &S) {
5312   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5313   if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5314     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5315     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5316     InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5317                            AllocaInsertPt->getIterator());
5318 
5319     auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5320                                InsertPointTy CodeGenIP) {
5321       Builder.restoreIP(CodeGenIP);
5322       EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5323     };
5324     CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5325     if (!CapturedStmtInfo)
5326       CapturedStmtInfo = &CapStmtInfo;
5327     Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
5328     return;
5329   }
5330   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5331     Action.Enter(CGF);
5332     if (const Expr *E = S.getReductionRef()) {
5333       SmallVector<const Expr *, 4> LHSs;
5334       SmallVector<const Expr *, 4> RHSs;
5335       OMPTaskDataTy Data;
5336       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5337         Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5338         Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5339         Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5340         Data.ReductionOps.append(C->reduction_ops().begin(),
5341                                  C->reduction_ops().end());
5342         LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5343         RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5344       }
5345       llvm::Value *ReductionDesc =
5346           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5347                                                            LHSs, RHSs, Data);
5348       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5349       CGF.EmitVarDecl(*VD);
5350       CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5351                             /*Volatile=*/false, E->getType());
5352     }
5353     CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5354   };
5355   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5356 }
5357 
EmitOMPFlushDirective(const OMPFlushDirective & S)5358 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5359   llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5360                                 ? llvm::AtomicOrdering::NotAtomic
5361                                 : llvm::AtomicOrdering::AcquireRelease;
5362   CGM.getOpenMPRuntime().emitFlush(
5363       *this,
5364       [&S]() -> ArrayRef<const Expr *> {
5365         if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5366           return llvm::ArrayRef(FlushClause->varlist_begin(),
5367                                 FlushClause->varlist_end());
5368         return std::nullopt;
5369       }(),
5370       S.getBeginLoc(), AO);
5371 }
5372 
EmitOMPDepobjDirective(const OMPDepobjDirective & S)5373 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5374   const auto *DO = S.getSingleClause<OMPDepobjClause>();
5375   LValue DOLVal = EmitLValue(DO->getDepobj());
5376   if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5377     OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5378                                            DC->getModifier());
5379     Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5380     Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5381         *this, Dependencies, DC->getBeginLoc());
5382     EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
5383     return;
5384   }
5385   if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5386     CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5387     return;
5388   }
5389   if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5390     CGM.getOpenMPRuntime().emitUpdateClause(
5391         *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5392     return;
5393   }
5394 }
5395 
EmitOMPScanDirective(const OMPScanDirective & S)5396 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5397   if (!OMPParentLoopDirectiveForScan)
5398     return;
5399   const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5400   bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5401   SmallVector<const Expr *, 4> Shareds;
5402   SmallVector<const Expr *, 4> Privates;
5403   SmallVector<const Expr *, 4> LHSs;
5404   SmallVector<const Expr *, 4> RHSs;
5405   SmallVector<const Expr *, 4> ReductionOps;
5406   SmallVector<const Expr *, 4> CopyOps;
5407   SmallVector<const Expr *, 4> CopyArrayTemps;
5408   SmallVector<const Expr *, 4> CopyArrayElems;
5409   for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5410     if (C->getModifier() != OMPC_REDUCTION_inscan)
5411       continue;
5412     Shareds.append(C->varlist_begin(), C->varlist_end());
5413     Privates.append(C->privates().begin(), C->privates().end());
5414     LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5415     RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5416     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5417     CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5418     CopyArrayTemps.append(C->copy_array_temps().begin(),
5419                           C->copy_array_temps().end());
5420     CopyArrayElems.append(C->copy_array_elems().begin(),
5421                           C->copy_array_elems().end());
5422   }
5423   if (ParentDir.getDirectiveKind() == OMPD_simd ||
5424       (getLangOpts().OpenMPSimd &&
5425        isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5426     // For simd directive and simd-based directives in simd only mode, use the
5427     // following codegen:
5428     // int x = 0;
5429     // #pragma omp simd reduction(inscan, +: x)
5430     // for (..) {
5431     //   <first part>
5432     //   #pragma omp scan inclusive(x)
5433     //   <second part>
5434     //  }
5435     // is transformed to:
5436     // int x = 0;
5437     // for (..) {
5438     //   int x_priv = 0;
5439     //   <first part>
5440     //   x = x_priv + x;
5441     //   x_priv = x;
5442     //   <second part>
5443     // }
5444     // and
5445     // int x = 0;
5446     // #pragma omp simd reduction(inscan, +: x)
5447     // for (..) {
5448     //   <first part>
5449     //   #pragma omp scan exclusive(x)
5450     //   <second part>
5451     // }
5452     // to
5453     // int x = 0;
5454     // for (..) {
5455     //   int x_priv = 0;
5456     //   <second part>
5457     //   int temp = x;
5458     //   x = x_priv + x;
5459     //   x_priv = temp;
5460     //   <first part>
5461     // }
5462     llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5463     EmitBranch(IsInclusive
5464                    ? OMPScanReduce
5465                    : BreakContinueStack.back().ContinueBlock.getBlock());
5466     EmitBlock(OMPScanDispatch);
5467     {
5468       // New scope for correct construction/destruction of temp variables for
5469       // exclusive scan.
5470       LexicalScope Scope(*this, S.getSourceRange());
5471       EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5472       EmitBlock(OMPScanReduce);
5473       if (!IsInclusive) {
5474         // Create temp var and copy LHS value to this temp value.
5475         // TMP = LHS;
5476         for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5477           const Expr *PrivateExpr = Privates[I];
5478           const Expr *TempExpr = CopyArrayTemps[I];
5479           EmitAutoVarDecl(
5480               *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5481           LValue DestLVal = EmitLValue(TempExpr);
5482           LValue SrcLVal = EmitLValue(LHSs[I]);
5483           EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5484                       SrcLVal.getAddress(*this),
5485                       cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5486                       cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5487                       CopyOps[I]);
5488         }
5489       }
5490       CGM.getOpenMPRuntime().emitReduction(
5491           *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5492           {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5493       for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5494         const Expr *PrivateExpr = Privates[I];
5495         LValue DestLVal;
5496         LValue SrcLVal;
5497         if (IsInclusive) {
5498           DestLVal = EmitLValue(RHSs[I]);
5499           SrcLVal = EmitLValue(LHSs[I]);
5500         } else {
5501           const Expr *TempExpr = CopyArrayTemps[I];
5502           DestLVal = EmitLValue(RHSs[I]);
5503           SrcLVal = EmitLValue(TempExpr);
5504         }
5505         EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5506                     SrcLVal.getAddress(*this),
5507                     cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5508                     cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5509                     CopyOps[I]);
5510       }
5511     }
5512     EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5513     OMPScanExitBlock = IsInclusive
5514                            ? BreakContinueStack.back().ContinueBlock.getBlock()
5515                            : OMPScanReduce;
5516     EmitBlock(OMPAfterScanBlock);
5517     return;
5518   }
5519   if (!IsInclusive) {
5520     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5521     EmitBlock(OMPScanExitBlock);
5522   }
5523   if (OMPFirstScanLoop) {
5524     // Emit buffer[i] = red; at the end of the input phase.
5525     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5526                              .getIterationVariable()
5527                              ->IgnoreParenImpCasts();
5528     LValue IdxLVal = EmitLValue(IVExpr);
5529     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5530     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5531     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5532       const Expr *PrivateExpr = Privates[I];
5533       const Expr *OrigExpr = Shareds[I];
5534       const Expr *CopyArrayElem = CopyArrayElems[I];
5535       OpaqueValueMapping IdxMapping(
5536           *this,
5537           cast<OpaqueValueExpr>(
5538               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5539           RValue::get(IdxVal));
5540       LValue DestLVal = EmitLValue(CopyArrayElem);
5541       LValue SrcLVal = EmitLValue(OrigExpr);
5542       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5543                   SrcLVal.getAddress(*this),
5544                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5545                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5546                   CopyOps[I]);
5547     }
5548   }
5549   EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5550   if (IsInclusive) {
5551     EmitBlock(OMPScanExitBlock);
5552     EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5553   }
5554   EmitBlock(OMPScanDispatch);
5555   if (!OMPFirstScanLoop) {
5556     // Emit red = buffer[i]; at the entrance to the scan phase.
5557     const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5558                              .getIterationVariable()
5559                              ->IgnoreParenImpCasts();
5560     LValue IdxLVal = EmitLValue(IVExpr);
5561     llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5562     IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5563     llvm::BasicBlock *ExclusiveExitBB = nullptr;
5564     if (!IsInclusive) {
5565       llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5566       ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5567       llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5568       Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5569       EmitBlock(ContBB);
5570       // Use idx - 1 iteration for exclusive scan.
5571       IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5572     }
5573     for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5574       const Expr *PrivateExpr = Privates[I];
5575       const Expr *OrigExpr = Shareds[I];
5576       const Expr *CopyArrayElem = CopyArrayElems[I];
5577       OpaqueValueMapping IdxMapping(
5578           *this,
5579           cast<OpaqueValueExpr>(
5580               cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5581           RValue::get(IdxVal));
5582       LValue SrcLVal = EmitLValue(CopyArrayElem);
5583       LValue DestLVal = EmitLValue(OrigExpr);
5584       EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5585                   SrcLVal.getAddress(*this),
5586                   cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5587                   cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5588                   CopyOps[I]);
5589     }
5590     if (!IsInclusive) {
5591       EmitBlock(ExclusiveExitBB);
5592     }
5593   }
5594   EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5595                                                : OMPAfterScanBlock);
5596   EmitBlock(OMPAfterScanBlock);
5597 }
5598 
EmitOMPDistributeLoop(const OMPLoopDirective & S,const CodeGenLoopTy & CodeGenLoop,Expr * IncExpr)5599 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5600                                             const CodeGenLoopTy &CodeGenLoop,
5601                                             Expr *IncExpr) {
5602   // Emit the loop iteration variable.
5603   const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5604   const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5605   EmitVarDecl(*IVDecl);
5606 
5607   // Emit the iterations count variable.
5608   // If it is not a variable, Sema decided to calculate iterations count on each
5609   // iteration (e.g., it is foldable into a constant).
5610   if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5611     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5612     // Emit calculation of the iterations count.
5613     EmitIgnoredExpr(S.getCalcLastIteration());
5614   }
5615 
5616   CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5617 
5618   bool HasLastprivateClause = false;
5619   // Check pre-condition.
5620   {
5621     OMPLoopScope PreInitScope(*this, S);
5622     // Skip the entire loop if we don't meet the precondition.
5623     // If the condition constant folds and can be elided, avoid emitting the
5624     // whole loop.
5625     bool CondConstant;
5626     llvm::BasicBlock *ContBlock = nullptr;
5627     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5628       if (!CondConstant)
5629         return;
5630     } else {
5631       llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5632       ContBlock = createBasicBlock("omp.precond.end");
5633       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5634                   getProfileCount(&S));
5635       EmitBlock(ThenBlock);
5636       incrementProfileCounter(&S);
5637     }
5638 
5639     emitAlignedClause(*this, S);
5640     // Emit 'then' code.
5641     {
5642       // Emit helper vars inits.
5643 
5644       LValue LB = EmitOMPHelperVar(
5645           *this, cast<DeclRefExpr>(
5646                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5647                           ? S.getCombinedLowerBoundVariable()
5648                           : S.getLowerBoundVariable())));
5649       LValue UB = EmitOMPHelperVar(
5650           *this, cast<DeclRefExpr>(
5651                      (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5652                           ? S.getCombinedUpperBoundVariable()
5653                           : S.getUpperBoundVariable())));
5654       LValue ST =
5655           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5656       LValue IL =
5657           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5658 
5659       OMPPrivateScope LoopScope(*this);
5660       if (EmitOMPFirstprivateClause(S, LoopScope)) {
5661         // Emit implicit barrier to synchronize threads and avoid data races
5662         // on initialization of firstprivate variables and post-update of
5663         // lastprivate variables.
5664         CGM.getOpenMPRuntime().emitBarrierCall(
5665             *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5666             /*ForceSimpleCall=*/true);
5667       }
5668       EmitOMPPrivateClause(S, LoopScope);
5669       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5670           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5671           !isOpenMPTeamsDirective(S.getDirectiveKind()))
5672         EmitOMPReductionClauseInit(S, LoopScope);
5673       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5674       EmitOMPPrivateLoopCounters(S, LoopScope);
5675       (void)LoopScope.Privatize();
5676       if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5677         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5678 
5679       // Detect the distribute schedule kind and chunk.
5680       llvm::Value *Chunk = nullptr;
5681       OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5682       if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5683         ScheduleKind = C->getDistScheduleKind();
5684         if (const Expr *Ch = C->getChunkSize()) {
5685           Chunk = EmitScalarExpr(Ch);
5686           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5687                                        S.getIterationVariable()->getType(),
5688                                        S.getBeginLoc());
5689         }
5690       } else {
5691         // Default behaviour for dist_schedule clause.
5692         CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5693             *this, S, ScheduleKind, Chunk);
5694       }
5695       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5696       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5697 
5698       // OpenMP [2.10.8, distribute Construct, Description]
5699       // If dist_schedule is specified, kind must be static. If specified,
5700       // iterations are divided into chunks of size chunk_size, chunks are
5701       // assigned to the teams of the league in a round-robin fashion in the
5702       // order of the team number. When no chunk_size is specified, the
5703       // iteration space is divided into chunks that are approximately equal
5704       // in size, and at most one chunk is distributed to each team of the
5705       // league. The size of the chunks is unspecified in this case.
5706       bool StaticChunked =
5707           RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5708           isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5709       if (RT.isStaticNonchunked(ScheduleKind,
5710                                 /* Chunked */ Chunk != nullptr) ||
5711           StaticChunked) {
5712         CGOpenMPRuntime::StaticRTInput StaticInit(
5713             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5714             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5715             StaticChunked ? Chunk : nullptr);
5716         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5717                                     StaticInit);
5718         JumpDest LoopExit =
5719             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5720         // UB = min(UB, GlobalUB);
5721         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5722                             ? S.getCombinedEnsureUpperBound()
5723                             : S.getEnsureUpperBound());
5724         // IV = LB;
5725         EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5726                             ? S.getCombinedInit()
5727                             : S.getInit());
5728 
5729         const Expr *Cond =
5730             isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5731                 ? S.getCombinedCond()
5732                 : S.getCond();
5733 
5734         if (StaticChunked)
5735           Cond = S.getCombinedDistCond();
5736 
5737         // For static unchunked schedules generate:
5738         //
5739         //  1. For distribute alone, codegen
5740         //    while (idx <= UB) {
5741         //      BODY;
5742         //      ++idx;
5743         //    }
5744         //
5745         //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
5746         //    while (idx <= UB) {
5747         //      <CodeGen rest of pragma>(LB, UB);
5748         //      idx += ST;
5749         //    }
5750         //
5751         // For static chunk one schedule generate:
5752         //
5753         // while (IV <= GlobalUB) {
5754         //   <CodeGen rest of pragma>(LB, UB);
5755         //   LB += ST;
5756         //   UB += ST;
5757         //   UB = min(UB, GlobalUB);
5758         //   IV = LB;
5759         // }
5760         //
5761         emitCommonSimdLoop(
5762             *this, S,
5763             [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5764               if (isOpenMPSimdDirective(S.getDirectiveKind()))
5765                 CGF.EmitOMPSimdInit(S);
5766             },
5767             [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5768              StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5769               CGF.EmitOMPInnerLoop(
5770                   S, LoopScope.requiresCleanups(), Cond, IncExpr,
5771                   [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5772                     CodeGenLoop(CGF, S, LoopExit);
5773                   },
5774                   [&S, StaticChunked](CodeGenFunction &CGF) {
5775                     if (StaticChunked) {
5776                       CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5777                       CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5778                       CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5779                       CGF.EmitIgnoredExpr(S.getCombinedInit());
5780                     }
5781                   });
5782             });
5783         EmitBlock(LoopExit.getBlock());
5784         // Tell the runtime we are done.
5785         RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5786       } else {
5787         // Emit the outer loop, which requests its work chunk [LB..UB] from
5788         // runtime and runs the inner loop to process it.
5789         const OMPLoopArguments LoopArguments = {
5790             LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5791             IL.getAddress(*this), Chunk};
5792         EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5793                                    CodeGenLoop);
5794       }
5795       if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5796         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5797           return CGF.Builder.CreateIsNotNull(
5798               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5799         });
5800       }
5801       if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5802           !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5803           !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5804         EmitOMPReductionClauseFinal(S, OMPD_simd);
5805         // Emit post-update of the reduction variables if IsLastIter != 0.
5806         emitPostUpdateForReductionClause(
5807             *this, S, [IL, &S](CodeGenFunction &CGF) {
5808               return CGF.Builder.CreateIsNotNull(
5809                   CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5810             });
5811       }
5812       // Emit final copy of the lastprivate variables if IsLastIter != 0.
5813       if (HasLastprivateClause) {
5814         EmitOMPLastprivateClauseFinal(
5815             S, /*NoFinals=*/false,
5816             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5817       }
5818     }
5819 
5820     // We're now done with the loop, so jump to the continuation block.
5821     if (ContBlock) {
5822       EmitBranch(ContBlock);
5823       EmitBlock(ContBlock, true);
5824     }
5825   }
5826 }
5827 
EmitOMPDistributeDirective(const OMPDistributeDirective & S)5828 void CodeGenFunction::EmitOMPDistributeDirective(
5829     const OMPDistributeDirective &S) {
5830   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5831     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5832   };
5833   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5834   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5835 }
5836 
emitOutlinedOrderedFunction(CodeGenModule & CGM,const CapturedStmt * S,SourceLocation Loc)5837 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5838                                                    const CapturedStmt *S,
5839                                                    SourceLocation Loc) {
5840   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5841   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5842   CGF.CapturedStmtInfo = &CapStmtInfo;
5843   llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5844   Fn->setDoesNotRecurse();
5845   return Fn;
5846 }
5847 
5848 template <typename T>
emitRestoreIP(CodeGenFunction & CGF,const T * C,llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,llvm::OpenMPIRBuilder & OMPBuilder)5849 static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5850                           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5851                           llvm::OpenMPIRBuilder &OMPBuilder) {
5852 
5853   unsigned NumLoops = C->getNumLoops();
5854   QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5855       /*DestWidth=*/64, /*Signed=*/1);
5856   llvm::SmallVector<llvm::Value *> StoreValues;
5857   for (unsigned I = 0; I < NumLoops; I++) {
5858     const Expr *CounterVal = C->getLoopData(I);
5859     assert(CounterVal);
5860     llvm::Value *StoreValue = CGF.EmitScalarConversion(
5861         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5862         CounterVal->getExprLoc());
5863     StoreValues.emplace_back(StoreValue);
5864   }
5865   OMPDoacrossKind<T> ODK;
5866   bool IsDependSource = ODK.isSource(C);
5867   CGF.Builder.restoreIP(
5868       OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops,
5869                                      StoreValues, ".cnt.addr", IsDependSource));
5870 }
5871 
EmitOMPOrderedDirective(const OMPOrderedDirective & S)5872 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5873   if (CGM.getLangOpts().OpenMPIRBuilder) {
5874     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5875     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5876 
5877     if (S.hasClausesOfKind<OMPDependClause>() ||
5878         S.hasClausesOfKind<OMPDoacrossClause>()) {
5879       // The ordered directive with depend clause.
5880       assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5881                                        "ordered depend|doacross construct.");
5882       InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5883                              AllocaInsertPt->getIterator());
5884       for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5885         emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5886       for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5887         emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5888     } else {
5889       // The ordered directive with threads or simd clause, or without clause.
5890       // Without clause, it behaves as if the threads clause is specified.
5891       const auto *C = S.getSingleClause<OMPSIMDClause>();
5892 
5893       auto FiniCB = [this](InsertPointTy IP) {
5894         OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5895       };
5896 
5897       auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5898                                      InsertPointTy CodeGenIP) {
5899         Builder.restoreIP(CodeGenIP);
5900 
5901         const CapturedStmt *CS = S.getInnermostCapturedStmt();
5902         if (C) {
5903           llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5904               Builder, /*CreateBranch=*/false, ".ordered.after");
5905           llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5906           GenerateOpenMPCapturedVars(*CS, CapturedVars);
5907           llvm::Function *OutlinedFn =
5908               emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5909           assert(S.getBeginLoc().isValid() &&
5910                  "Outlined function call location must be valid.");
5911           ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5912           OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
5913                                                OutlinedFn, CapturedVars);
5914         } else {
5915           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5916               *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
5917         }
5918       };
5919 
5920       OMPLexicalScope Scope(*this, S, OMPD_unknown);
5921       Builder.restoreIP(
5922           OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5923     }
5924     return;
5925   }
5926 
5927   if (S.hasClausesOfKind<OMPDependClause>()) {
5928     assert(!S.hasAssociatedStmt() &&
5929            "No associated statement must be in ordered depend construct.");
5930     for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5931       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5932     return;
5933   }
5934   if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5935     assert(!S.hasAssociatedStmt() &&
5936            "No associated statement must be in ordered doacross construct.");
5937     for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5938       CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5939     return;
5940   }
5941   const auto *C = S.getSingleClause<OMPSIMDClause>();
5942   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5943                                  PrePostActionTy &Action) {
5944     const CapturedStmt *CS = S.getInnermostCapturedStmt();
5945     if (C) {
5946       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5947       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5948       llvm::Function *OutlinedFn =
5949           emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5950       CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5951                                                       OutlinedFn, CapturedVars);
5952     } else {
5953       Action.Enter(CGF);
5954       CGF.EmitStmt(CS->getCapturedStmt());
5955     }
5956   };
5957   OMPLexicalScope Scope(*this, S, OMPD_unknown);
5958   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5959 }
5960 
convertToScalarValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)5961 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5962                                          QualType SrcType, QualType DestType,
5963                                          SourceLocation Loc) {
5964   assert(CGF.hasScalarEvaluationKind(DestType) &&
5965          "DestType must have scalar evaluation kind.");
5966   assert(!Val.isAggregate() && "Must be a scalar or complex.");
5967   return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5968                                                    DestType, Loc)
5969                         : CGF.EmitComplexToScalarConversion(
5970                               Val.getComplexVal(), SrcType, DestType, Loc);
5971 }
5972 
5973 static CodeGenFunction::ComplexPairTy
convertToComplexValue(CodeGenFunction & CGF,RValue Val,QualType SrcType,QualType DestType,SourceLocation Loc)5974 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5975                       QualType DestType, SourceLocation Loc) {
5976   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5977          "DestType must have complex evaluation kind.");
5978   CodeGenFunction::ComplexPairTy ComplexVal;
5979   if (Val.isScalar()) {
5980     // Convert the input element to the element type of the complex.
5981     QualType DestElementType =
5982         DestType->castAs<ComplexType>()->getElementType();
5983     llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5984         Val.getScalarVal(), SrcType, DestElementType, Loc);
5985     ComplexVal = CodeGenFunction::ComplexPairTy(
5986         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5987   } else {
5988     assert(Val.isComplex() && "Must be a scalar or complex.");
5989     QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5990     QualType DestElementType =
5991         DestType->castAs<ComplexType>()->getElementType();
5992     ComplexVal.first = CGF.EmitScalarConversion(
5993         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5994     ComplexVal.second = CGF.EmitScalarConversion(
5995         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5996   }
5997   return ComplexVal;
5998 }
5999 
emitSimpleAtomicStore(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,RValue RVal)6000 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6001                                   LValue LVal, RValue RVal) {
6002   if (LVal.isGlobalReg())
6003     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
6004   else
6005     CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
6006 }
6007 
emitSimpleAtomicLoad(CodeGenFunction & CGF,llvm::AtomicOrdering AO,LValue LVal,SourceLocation Loc)6008 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6009                                    llvm::AtomicOrdering AO, LValue LVal,
6010                                    SourceLocation Loc) {
6011   if (LVal.isGlobalReg())
6012     return CGF.EmitLoadOfLValue(LVal, Loc);
6013   return CGF.EmitAtomicLoad(
6014       LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
6015       LVal.isVolatile());
6016 }
6017 
emitOMPSimpleStore(LValue LVal,RValue RVal,QualType RValTy,SourceLocation Loc)6018 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6019                                          QualType RValTy, SourceLocation Loc) {
6020   switch (getEvaluationKind(LVal.getType())) {
6021   case TEK_Scalar:
6022     EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6023                                *this, RVal, RValTy, LVal.getType(), Loc)),
6024                            LVal);
6025     break;
6026   case TEK_Complex:
6027     EmitStoreOfComplex(
6028         convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
6029         /*isInit=*/false);
6030     break;
6031   case TEK_Aggregate:
6032     llvm_unreachable("Must be a scalar or complex.");
6033   }
6034 }
6035 
emitOMPAtomicReadExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * V,SourceLocation Loc)6036 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6037                                   const Expr *X, const Expr *V,
6038                                   SourceLocation Loc) {
6039   // v = x;
6040   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6041   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6042   LValue XLValue = CGF.EmitLValue(X);
6043   LValue VLValue = CGF.EmitLValue(V);
6044   RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
6045   // OpenMP, 2.17.7, atomic Construct
6046   // If the read or capture clause is specified and the acquire, acq_rel, or
6047   // seq_cst clause is specified then the strong flush on exit from the atomic
6048   // operation is also an acquire flush.
6049   switch (AO) {
6050   case llvm::AtomicOrdering::Acquire:
6051   case llvm::AtomicOrdering::AcquireRelease:
6052   case llvm::AtomicOrdering::SequentiallyConsistent:
6053     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6054                                          llvm::AtomicOrdering::Acquire);
6055     break;
6056   case llvm::AtomicOrdering::Monotonic:
6057   case llvm::AtomicOrdering::Release:
6058     break;
6059   case llvm::AtomicOrdering::NotAtomic:
6060   case llvm::AtomicOrdering::Unordered:
6061     llvm_unreachable("Unexpected ordering.");
6062   }
6063   CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6064   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6065 }
6066 
emitOMPAtomicWriteExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,SourceLocation Loc)6067 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6068                                    llvm::AtomicOrdering AO, const Expr *X,
6069                                    const Expr *E, SourceLocation Loc) {
6070   // x = expr;
6071   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6072   emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6073   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6074   // OpenMP, 2.17.7, atomic Construct
6075   // If the write, update, or capture clause is specified and the release,
6076   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6077   // the atomic operation is also a release flush.
6078   switch (AO) {
6079   case llvm::AtomicOrdering::Release:
6080   case llvm::AtomicOrdering::AcquireRelease:
6081   case llvm::AtomicOrdering::SequentiallyConsistent:
6082     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6083                                          llvm::AtomicOrdering::Release);
6084     break;
6085   case llvm::AtomicOrdering::Acquire:
6086   case llvm::AtomicOrdering::Monotonic:
6087     break;
6088   case llvm::AtomicOrdering::NotAtomic:
6089   case llvm::AtomicOrdering::Unordered:
6090     llvm_unreachable("Unexpected ordering.");
6091   }
6092 }
6093 
emitOMPAtomicRMW(CodeGenFunction & CGF,LValue X,RValue Update,BinaryOperatorKind BO,llvm::AtomicOrdering AO,bool IsXLHSInRHSPart)6094 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6095                                                 RValue Update,
6096                                                 BinaryOperatorKind BO,
6097                                                 llvm::AtomicOrdering AO,
6098                                                 bool IsXLHSInRHSPart) {
6099   ASTContext &Context = CGF.getContext();
6100   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6101   // expression is simple and atomic is allowed for the given type for the
6102   // target platform.
6103   if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6104       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6105        (Update.getScalarVal()->getType() !=
6106         X.getAddress(CGF).getElementType())) ||
6107       !Context.getTargetInfo().hasBuiltinAtomic(
6108           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6109     return std::make_pair(false, RValue::get(nullptr));
6110 
6111   auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6112     if (T->isIntegerTy())
6113       return true;
6114 
6115     if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6116       return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6117 
6118     return false;
6119   };
6120 
6121   if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6122       !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
6123     return std::make_pair(false, RValue::get(nullptr));
6124 
6125   bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
6126   llvm::AtomicRMWInst::BinOp RMWOp;
6127   switch (BO) {
6128   case BO_Add:
6129     RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6130     break;
6131   case BO_Sub:
6132     if (!IsXLHSInRHSPart)
6133       return std::make_pair(false, RValue::get(nullptr));
6134     RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6135     break;
6136   case BO_And:
6137     RMWOp = llvm::AtomicRMWInst::And;
6138     break;
6139   case BO_Or:
6140     RMWOp = llvm::AtomicRMWInst::Or;
6141     break;
6142   case BO_Xor:
6143     RMWOp = llvm::AtomicRMWInst::Xor;
6144     break;
6145   case BO_LT:
6146     if (IsInteger)
6147       RMWOp = X.getType()->hasSignedIntegerRepresentation()
6148                   ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6149                                      : llvm::AtomicRMWInst::Max)
6150                   : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6151                                      : llvm::AtomicRMWInst::UMax);
6152     else
6153       RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6154                               : llvm::AtomicRMWInst::FMax;
6155     break;
6156   case BO_GT:
6157     if (IsInteger)
6158       RMWOp = X.getType()->hasSignedIntegerRepresentation()
6159                   ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6160                                      : llvm::AtomicRMWInst::Min)
6161                   : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6162                                      : llvm::AtomicRMWInst::UMin);
6163     else
6164       RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6165                               : llvm::AtomicRMWInst::FMin;
6166     break;
6167   case BO_Assign:
6168     RMWOp = llvm::AtomicRMWInst::Xchg;
6169     break;
6170   case BO_Mul:
6171   case BO_Div:
6172   case BO_Rem:
6173   case BO_Shl:
6174   case BO_Shr:
6175   case BO_LAnd:
6176   case BO_LOr:
6177     return std::make_pair(false, RValue::get(nullptr));
6178   case BO_PtrMemD:
6179   case BO_PtrMemI:
6180   case BO_LE:
6181   case BO_GE:
6182   case BO_EQ:
6183   case BO_NE:
6184   case BO_Cmp:
6185   case BO_AddAssign:
6186   case BO_SubAssign:
6187   case BO_AndAssign:
6188   case BO_OrAssign:
6189   case BO_XorAssign:
6190   case BO_MulAssign:
6191   case BO_DivAssign:
6192   case BO_RemAssign:
6193   case BO_ShlAssign:
6194   case BO_ShrAssign:
6195   case BO_Comma:
6196     llvm_unreachable("Unsupported atomic update operation");
6197   }
6198   llvm::Value *UpdateVal = Update.getScalarVal();
6199   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6200     if (IsInteger)
6201       UpdateVal = CGF.Builder.CreateIntCast(
6202           IC, X.getAddress(CGF).getElementType(),
6203           X.getType()->hasSignedIntegerRepresentation());
6204     else
6205       UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6206                                          X.getAddress(CGF).getElementType());
6207   }
6208   llvm::Value *Res =
6209       CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(CGF), UpdateVal, AO);
6210   return std::make_pair(true, RValue::get(Res));
6211 }
6212 
EmitOMPAtomicSimpleUpdateExpr(LValue X,RValue E,BinaryOperatorKind BO,bool IsXLHSInRHSPart,llvm::AtomicOrdering AO,SourceLocation Loc,const llvm::function_ref<RValue (RValue)> CommonGen)6213 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6214     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6215     llvm::AtomicOrdering AO, SourceLocation Loc,
6216     const llvm::function_ref<RValue(RValue)> CommonGen) {
6217   // Update expressions are allowed to have the following forms:
6218   // x binop= expr; -> xrval + expr;
6219   // x++, ++x -> xrval + 1;
6220   // x--, --x -> xrval - 1;
6221   // x = x binop expr; -> xrval binop expr
6222   // x = expr Op x; - > expr binop xrval;
6223   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6224   if (!Res.first) {
6225     if (X.isGlobalReg()) {
6226       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6227       // 'xrval'.
6228       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6229     } else {
6230       // Perform compare-and-swap procedure.
6231       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6232     }
6233   }
6234   return Res;
6235 }
6236 
emitOMPAtomicUpdateExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)6237 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6238                                     llvm::AtomicOrdering AO, const Expr *X,
6239                                     const Expr *E, const Expr *UE,
6240                                     bool IsXLHSInRHSPart, SourceLocation Loc) {
6241   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6242          "Update expr in 'atomic update' must be a binary operator.");
6243   const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6244   // Update expressions are allowed to have the following forms:
6245   // x binop= expr; -> xrval + expr;
6246   // x++, ++x -> xrval + 1;
6247   // x--, --x -> xrval - 1;
6248   // x = x binop expr; -> xrval binop expr
6249   // x = expr Op x; - > expr binop xrval;
6250   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6251   LValue XLValue = CGF.EmitLValue(X);
6252   RValue ExprRValue = CGF.EmitAnyExpr(E);
6253   const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6254   const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6255   const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6256   const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6257   auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6258     CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6259     CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6260     return CGF.EmitAnyExpr(UE);
6261   };
6262   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6263       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6264   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6265   // OpenMP, 2.17.7, atomic Construct
6266   // If the write, update, or capture clause is specified and the release,
6267   // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6268   // the atomic operation is also a release flush.
6269   switch (AO) {
6270   case llvm::AtomicOrdering::Release:
6271   case llvm::AtomicOrdering::AcquireRelease:
6272   case llvm::AtomicOrdering::SequentiallyConsistent:
6273     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6274                                          llvm::AtomicOrdering::Release);
6275     break;
6276   case llvm::AtomicOrdering::Acquire:
6277   case llvm::AtomicOrdering::Monotonic:
6278     break;
6279   case llvm::AtomicOrdering::NotAtomic:
6280   case llvm::AtomicOrdering::Unordered:
6281     llvm_unreachable("Unexpected ordering.");
6282   }
6283 }
6284 
convertToType(CodeGenFunction & CGF,RValue Value,QualType SourceType,QualType ResType,SourceLocation Loc)6285 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6286                             QualType SourceType, QualType ResType,
6287                             SourceLocation Loc) {
6288   switch (CGF.getEvaluationKind(ResType)) {
6289   case TEK_Scalar:
6290     return RValue::get(
6291         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6292   case TEK_Complex: {
6293     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6294     return RValue::getComplex(Res.first, Res.second);
6295   }
6296   case TEK_Aggregate:
6297     break;
6298   }
6299   llvm_unreachable("Must be a scalar or complex.");
6300 }
6301 
emitOMPAtomicCaptureExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,bool IsPostfixUpdate,const Expr * V,const Expr * X,const Expr * E,const Expr * UE,bool IsXLHSInRHSPart,SourceLocation Loc)6302 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6303                                      llvm::AtomicOrdering AO,
6304                                      bool IsPostfixUpdate, const Expr *V,
6305                                      const Expr *X, const Expr *E,
6306                                      const Expr *UE, bool IsXLHSInRHSPart,
6307                                      SourceLocation Loc) {
6308   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6309   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6310   RValue NewVVal;
6311   LValue VLValue = CGF.EmitLValue(V);
6312   LValue XLValue = CGF.EmitLValue(X);
6313   RValue ExprRValue = CGF.EmitAnyExpr(E);
6314   QualType NewVValType;
6315   if (UE) {
6316     // 'x' is updated with some additional value.
6317     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6318            "Update expr in 'atomic capture' must be a binary operator.");
6319     const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6320     // Update expressions are allowed to have the following forms:
6321     // x binop= expr; -> xrval + expr;
6322     // x++, ++x -> xrval + 1;
6323     // x--, --x -> xrval - 1;
6324     // x = x binop expr; -> xrval binop expr
6325     // x = expr Op x; - > expr binop xrval;
6326     const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6327     const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6328     const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6329     NewVValType = XRValExpr->getType();
6330     const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6331     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6332                   IsPostfixUpdate](RValue XRValue) {
6333       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6334       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6335       RValue Res = CGF.EmitAnyExpr(UE);
6336       NewVVal = IsPostfixUpdate ? XRValue : Res;
6337       return Res;
6338     };
6339     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6340         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6341     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6342     if (Res.first) {
6343       // 'atomicrmw' instruction was generated.
6344       if (IsPostfixUpdate) {
6345         // Use old value from 'atomicrmw'.
6346         NewVVal = Res.second;
6347       } else {
6348         // 'atomicrmw' does not provide new value, so evaluate it using old
6349         // value of 'x'.
6350         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6351         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6352         NewVVal = CGF.EmitAnyExpr(UE);
6353       }
6354     }
6355   } else {
6356     // 'x' is simply rewritten with some 'expr'.
6357     NewVValType = X->getType().getNonReferenceType();
6358     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6359                                X->getType().getNonReferenceType(), Loc);
6360     auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6361       NewVVal = XRValue;
6362       return ExprRValue;
6363     };
6364     // Try to perform atomicrmw xchg, otherwise simple exchange.
6365     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6366         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6367         Loc, Gen);
6368     CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6369     if (Res.first) {
6370       // 'atomicrmw' instruction was generated.
6371       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6372     }
6373   }
6374   // Emit post-update store to 'v' of old/new 'x' value.
6375   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6376   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6377   // OpenMP 5.1 removes the required flush for capture clause.
6378   if (CGF.CGM.getLangOpts().OpenMP < 51) {
6379     // OpenMP, 2.17.7, atomic Construct
6380     // If the write, update, or capture clause is specified and the release,
6381     // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6382     // the atomic operation is also a release flush.
6383     // If the read or capture clause is specified and the acquire, acq_rel, or
6384     // seq_cst clause is specified then the strong flush on exit from the atomic
6385     // operation is also an acquire flush.
6386     switch (AO) {
6387     case llvm::AtomicOrdering::Release:
6388       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6389                                            llvm::AtomicOrdering::Release);
6390       break;
6391     case llvm::AtomicOrdering::Acquire:
6392       CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6393                                            llvm::AtomicOrdering::Acquire);
6394       break;
6395     case llvm::AtomicOrdering::AcquireRelease:
6396     case llvm::AtomicOrdering::SequentiallyConsistent:
6397       CGF.CGM.getOpenMPRuntime().emitFlush(
6398           CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
6399       break;
6400     case llvm::AtomicOrdering::Monotonic:
6401       break;
6402     case llvm::AtomicOrdering::NotAtomic:
6403     case llvm::AtomicOrdering::Unordered:
6404       llvm_unreachable("Unexpected ordering.");
6405     }
6406   }
6407 }
6408 
emitOMPAtomicCompareExpr(CodeGenFunction & CGF,llvm::AtomicOrdering AO,llvm::AtomicOrdering FailAO,const Expr * X,const Expr * V,const Expr * R,const Expr * E,const Expr * D,const Expr * CE,bool IsXBinopExpr,bool IsPostfixUpdate,bool IsFailOnly,SourceLocation Loc)6409 static void emitOMPAtomicCompareExpr(
6410     CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6411     const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6412     const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6413     SourceLocation Loc) {
6414   llvm::OpenMPIRBuilder &OMPBuilder =
6415       CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6416 
6417   OMPAtomicCompareOp Op;
6418   assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6419   switch (cast<BinaryOperator>(CE)->getOpcode()) {
6420   case BO_EQ:
6421     Op = OMPAtomicCompareOp::EQ;
6422     break;
6423   case BO_LT:
6424     Op = OMPAtomicCompareOp::MIN;
6425     break;
6426   case BO_GT:
6427     Op = OMPAtomicCompareOp::MAX;
6428     break;
6429   default:
6430     llvm_unreachable("unsupported atomic compare binary operator");
6431   }
6432 
6433   LValue XLVal = CGF.EmitLValue(X);
6434   Address XAddr = XLVal.getAddress(CGF);
6435 
6436   auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6437     if (X->getType() == E->getType())
6438       return CGF.EmitScalarExpr(E);
6439     const Expr *NewE = E->IgnoreImplicitAsWritten();
6440     llvm::Value *V = CGF.EmitScalarExpr(NewE);
6441     if (NewE->getType() == X->getType())
6442       return V;
6443     return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
6444   };
6445 
6446   llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6447   llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6448   if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
6449     EVal = CGF.Builder.CreateIntCast(
6450         CI, XLVal.getAddress(CGF).getElementType(),
6451         E->getType()->hasSignedIntegerRepresentation());
6452   if (DVal)
6453     if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
6454       DVal = CGF.Builder.CreateIntCast(
6455           CI, XLVal.getAddress(CGF).getElementType(),
6456           D->getType()->hasSignedIntegerRepresentation());
6457 
6458   llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6459       XAddr.getPointer(), XAddr.getElementType(),
6460       X->getType()->hasSignedIntegerRepresentation(),
6461       X->getType().isVolatileQualified()};
6462   llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6463   if (V) {
6464     LValue LV = CGF.EmitLValue(V);
6465     Address Addr = LV.getAddress(CGF);
6466     VOpVal = {Addr.getPointer(), Addr.getElementType(),
6467               V->getType()->hasSignedIntegerRepresentation(),
6468               V->getType().isVolatileQualified()};
6469   }
6470   if (R) {
6471     LValue LV = CGF.EmitLValue(R);
6472     Address Addr = LV.getAddress(CGF);
6473     ROpVal = {Addr.getPointer(), Addr.getElementType(),
6474               R->getType()->hasSignedIntegerRepresentation(),
6475               R->getType().isVolatileQualified()};
6476   }
6477 
6478   if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6479     // fail clause was not mentionend on the
6480     // "#pragma omp atomic compare" construct.
6481     CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6482         CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6483         IsPostfixUpdate, IsFailOnly));
6484   } else
6485     CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6486         CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6487         IsPostfixUpdate, IsFailOnly, FailAO));
6488 }
6489 
emitOMPAtomicExpr(CodeGenFunction & CGF,OpenMPClauseKind Kind,llvm::AtomicOrdering AO,llvm::AtomicOrdering FailAO,bool IsPostfixUpdate,const Expr * X,const Expr * V,const Expr * R,const Expr * E,const Expr * UE,const Expr * D,const Expr * CE,bool IsXLHSInRHSPart,bool IsFailOnly,SourceLocation Loc)6490 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6491                               llvm::AtomicOrdering AO,
6492                               llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6493                               const Expr *X, const Expr *V, const Expr *R,
6494                               const Expr *E, const Expr *UE, const Expr *D,
6495                               const Expr *CE, bool IsXLHSInRHSPart,
6496                               bool IsFailOnly, SourceLocation Loc) {
6497   switch (Kind) {
6498   case OMPC_read:
6499     emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6500     break;
6501   case OMPC_write:
6502     emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6503     break;
6504   case OMPC_unknown:
6505   case OMPC_update:
6506     emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6507     break;
6508   case OMPC_capture:
6509     emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6510                              IsXLHSInRHSPart, Loc);
6511     break;
6512   case OMPC_compare: {
6513     emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6514                              IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6515     break;
6516   }
6517   default:
6518     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6519   }
6520 }
6521 
EmitOMPAtomicDirective(const OMPAtomicDirective & S)6522 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6523   llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
6524   // Fail Memory Clause Ordering.
6525   llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6526   bool MemOrderingSpecified = false;
6527   if (S.getSingleClause<OMPSeqCstClause>()) {
6528     AO = llvm::AtomicOrdering::SequentiallyConsistent;
6529     MemOrderingSpecified = true;
6530   } else if (S.getSingleClause<OMPAcqRelClause>()) {
6531     AO = llvm::AtomicOrdering::AcquireRelease;
6532     MemOrderingSpecified = true;
6533   } else if (S.getSingleClause<OMPAcquireClause>()) {
6534     AO = llvm::AtomicOrdering::Acquire;
6535     MemOrderingSpecified = true;
6536   } else if (S.getSingleClause<OMPReleaseClause>()) {
6537     AO = llvm::AtomicOrdering::Release;
6538     MemOrderingSpecified = true;
6539   } else if (S.getSingleClause<OMPRelaxedClause>()) {
6540     AO = llvm::AtomicOrdering::Monotonic;
6541     MemOrderingSpecified = true;
6542   }
6543   llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6544   OpenMPClauseKind Kind = OMPC_unknown;
6545   for (const OMPClause *C : S.clauses()) {
6546     // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6547     // if it is first).
6548     OpenMPClauseKind K = C->getClauseKind();
6549     if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6550         K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6551       continue;
6552     Kind = K;
6553     KindsEncountered.insert(K);
6554   }
6555   // We just need to correct Kind here. No need to set a bool saying it is
6556   // actually compare capture because we can tell from whether V and R are
6557   // nullptr.
6558   if (KindsEncountered.contains(OMPC_compare) &&
6559       KindsEncountered.contains(OMPC_capture))
6560     Kind = OMPC_compare;
6561   if (!MemOrderingSpecified) {
6562     llvm::AtomicOrdering DefaultOrder =
6563         CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6564     if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6565         DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6566         (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6567          Kind == OMPC_capture)) {
6568       AO = DefaultOrder;
6569     } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6570       if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6571         AO = llvm::AtomicOrdering::Release;
6572       } else if (Kind == OMPC_read) {
6573         assert(Kind == OMPC_read && "Unexpected atomic kind.");
6574         AO = llvm::AtomicOrdering::Acquire;
6575       }
6576     }
6577   }
6578 
6579   if (KindsEncountered.contains(OMPC_compare) &&
6580       KindsEncountered.contains(OMPC_fail)) {
6581     Kind = OMPC_compare;
6582     const auto *FailClause = S.getSingleClause<OMPFailClause>();
6583     if (FailClause) {
6584       OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6585       if (FailParameter == llvm::omp::OMPC_relaxed)
6586         FailAO = llvm::AtomicOrdering::Monotonic;
6587       else if (FailParameter == llvm::omp::OMPC_acquire)
6588         FailAO = llvm::AtomicOrdering::Acquire;
6589       else if (FailParameter == llvm::omp::OMPC_seq_cst)
6590         FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6591     }
6592   }
6593 
6594   LexicalScope Scope(*this, S.getSourceRange());
6595   EmitStopPoint(S.getAssociatedStmt());
6596   emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(),
6597                     S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(),
6598                     S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(),
6599                     S.isFailOnly(), S.getBeginLoc());
6600 }
6601 
emitCommonOMPTargetDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,const RegionCodeGenTy & CodeGen)6602 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6603                                          const OMPExecutableDirective &S,
6604                                          const RegionCodeGenTy &CodeGen) {
6605   assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6606   CodeGenModule &CGM = CGF.CGM;
6607 
6608   // On device emit this construct as inlined code.
6609   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6610     OMPLexicalScope Scope(CGF, S, OMPD_target);
6611     CGM.getOpenMPRuntime().emitInlinedDirective(
6612         CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6613           CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6614         });
6615     return;
6616   }
6617 
6618   auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6619   llvm::Function *Fn = nullptr;
6620   llvm::Constant *FnID = nullptr;
6621 
6622   const Expr *IfCond = nullptr;
6623   // Check for the at most one if clause associated with the target region.
6624   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6625     if (C->getNameModifier() == OMPD_unknown ||
6626         C->getNameModifier() == OMPD_target) {
6627       IfCond = C->getCondition();
6628       break;
6629     }
6630   }
6631 
6632   // Check if we have any device clause associated with the directive.
6633   llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6634       nullptr, OMPC_DEVICE_unknown);
6635   if (auto *C = S.getSingleClause<OMPDeviceClause>())
6636     Device.setPointerAndInt(C->getDevice(), C->getModifier());
6637 
6638   // Check if we have an if clause whose conditional always evaluates to false
6639   // or if we do not have any targets specified. If so the target region is not
6640   // an offload entry point.
6641   bool IsOffloadEntry = true;
6642   if (IfCond) {
6643     bool Val;
6644     if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6645       IsOffloadEntry = false;
6646   }
6647   if (CGM.getLangOpts().OMPTargetTriples.empty())
6648     IsOffloadEntry = false;
6649 
6650   if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6651     unsigned DiagID = CGM.getDiags().getCustomDiagID(
6652         DiagnosticsEngine::Error,
6653         "No offloading entry generated while offloading is mandatory.");
6654     CGM.getDiags().Report(DiagID);
6655   }
6656 
6657   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6658   StringRef ParentName;
6659   // In case we have Ctors/Dtors we use the complete type variant to produce
6660   // the mangling of the device outlined kernel.
6661   if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6662     ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6663   else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6664     ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6665   else
6666     ParentName =
6667         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6668 
6669   // Emit target region as a standalone region.
6670   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6671                                                     IsOffloadEntry, CodeGen);
6672   OMPLexicalScope Scope(CGF, S, OMPD_task);
6673   auto &&SizeEmitter =
6674       [IsOffloadEntry](CodeGenFunction &CGF,
6675                        const OMPLoopDirective &D) -> llvm::Value * {
6676     if (IsOffloadEntry) {
6677       OMPLoopScope(CGF, D);
6678       // Emit calculation of the iterations count.
6679       llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6680       NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6681                                                 /*isSigned=*/false);
6682       return NumIterations;
6683     }
6684     return nullptr;
6685   };
6686   CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6687                                         SizeEmitter);
6688 }
6689 
emitTargetRegion(CodeGenFunction & CGF,const OMPTargetDirective & S,PrePostActionTy & Action)6690 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6691                              PrePostActionTy &Action) {
6692   Action.Enter(CGF);
6693   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6694   (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6695   CGF.EmitOMPPrivateClause(S, PrivateScope);
6696   (void)PrivateScope.Privatize();
6697   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6698     CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6699 
6700   CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6701   CGF.EnsureInsertPoint();
6702 }
6703 
EmitOMPTargetDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetDirective & S)6704 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6705                                                   StringRef ParentName,
6706                                                   const OMPTargetDirective &S) {
6707   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6708     emitTargetRegion(CGF, S, Action);
6709   };
6710   llvm::Function *Fn;
6711   llvm::Constant *Addr;
6712   // Emit target region as a standalone region.
6713   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6714       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6715   assert(Fn && Addr && "Target device function emission failed.");
6716 }
6717 
EmitOMPTargetDirective(const OMPTargetDirective & S)6718 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6719   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6720     emitTargetRegion(CGF, S, Action);
6721   };
6722   emitCommonOMPTargetDirective(*this, S, CodeGen);
6723 }
6724 
emitCommonOMPTeamsDirective(CodeGenFunction & CGF,const OMPExecutableDirective & S,OpenMPDirectiveKind InnermostKind,const RegionCodeGenTy & CodeGen)6725 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6726                                         const OMPExecutableDirective &S,
6727                                         OpenMPDirectiveKind InnermostKind,
6728                                         const RegionCodeGenTy &CodeGen) {
6729   const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6730   llvm::Function *OutlinedFn =
6731       CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6732           CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
6733           CodeGen);
6734 
6735   const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6736   const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6737   if (NT || TL) {
6738     const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6739     const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6740 
6741     CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6742                                                   S.getBeginLoc());
6743   }
6744 
6745   OMPTeamsScope Scope(CGF, S);
6746   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6747   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6748   CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6749                                            CapturedVars);
6750 }
6751 
EmitOMPTeamsDirective(const OMPTeamsDirective & S)6752 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6753   // Emit teams region as a standalone region.
6754   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6755     Action.Enter(CGF);
6756     OMPPrivateScope PrivateScope(CGF);
6757     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6758     CGF.EmitOMPPrivateClause(S, PrivateScope);
6759     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6760     (void)PrivateScope.Privatize();
6761     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6762     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6763   };
6764   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6765   emitPostUpdateForReductionClause(*this, S,
6766                                    [](CodeGenFunction &) { return nullptr; });
6767 }
6768 
emitTargetTeamsRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDirective & S)6769 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6770                                   const OMPTargetTeamsDirective &S) {
6771   auto *CS = S.getCapturedStmt(OMPD_teams);
6772   Action.Enter(CGF);
6773   // Emit teams region as a standalone region.
6774   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6775     Action.Enter(CGF);
6776     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6777     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6778     CGF.EmitOMPPrivateClause(S, PrivateScope);
6779     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6780     (void)PrivateScope.Privatize();
6781     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6782       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6783     CGF.EmitStmt(CS->getCapturedStmt());
6784     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6785   };
6786   emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6787   emitPostUpdateForReductionClause(CGF, S,
6788                                    [](CodeGenFunction &) { return nullptr; });
6789 }
6790 
EmitOMPTargetTeamsDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDirective & S)6791 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6792     CodeGenModule &CGM, StringRef ParentName,
6793     const OMPTargetTeamsDirective &S) {
6794   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6795     emitTargetTeamsRegion(CGF, Action, S);
6796   };
6797   llvm::Function *Fn;
6798   llvm::Constant *Addr;
6799   // Emit target region as a standalone region.
6800   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6801       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6802   assert(Fn && Addr && "Target device function emission failed.");
6803 }
6804 
EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective & S)6805 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6806     const OMPTargetTeamsDirective &S) {
6807   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6808     emitTargetTeamsRegion(CGF, Action, S);
6809   };
6810   emitCommonOMPTargetDirective(*this, S, CodeGen);
6811 }
6812 
6813 static void
emitTargetTeamsDistributeRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeDirective & S)6814 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6815                                 const OMPTargetTeamsDistributeDirective &S) {
6816   Action.Enter(CGF);
6817   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6818     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6819   };
6820 
6821   // Emit teams region as a standalone region.
6822   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6823                                             PrePostActionTy &Action) {
6824     Action.Enter(CGF);
6825     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6826     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6827     (void)PrivateScope.Privatize();
6828     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6829                                                     CodeGenDistribute);
6830     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6831   };
6832   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6833   emitPostUpdateForReductionClause(CGF, S,
6834                                    [](CodeGenFunction &) { return nullptr; });
6835 }
6836 
EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeDirective & S)6837 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6838     CodeGenModule &CGM, StringRef ParentName,
6839     const OMPTargetTeamsDistributeDirective &S) {
6840   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6841     emitTargetTeamsDistributeRegion(CGF, Action, S);
6842   };
6843   llvm::Function *Fn;
6844   llvm::Constant *Addr;
6845   // Emit target region as a standalone region.
6846   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6847       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6848   assert(Fn && Addr && "Target device function emission failed.");
6849 }
6850 
EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective & S)6851 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6852     const OMPTargetTeamsDistributeDirective &S) {
6853   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6854     emitTargetTeamsDistributeRegion(CGF, Action, S);
6855   };
6856   emitCommonOMPTargetDirective(*this, S, CodeGen);
6857 }
6858 
emitTargetTeamsDistributeSimdRegion(CodeGenFunction & CGF,PrePostActionTy & Action,const OMPTargetTeamsDistributeSimdDirective & S)6859 static void emitTargetTeamsDistributeSimdRegion(
6860     CodeGenFunction &CGF, PrePostActionTy &Action,
6861     const OMPTargetTeamsDistributeSimdDirective &S) {
6862   Action.Enter(CGF);
6863   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6864     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6865   };
6866 
6867   // Emit teams region as a standalone region.
6868   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6869                                             PrePostActionTy &Action) {
6870     Action.Enter(CGF);
6871     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6872     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6873     (void)PrivateScope.Privatize();
6874     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6875                                                     CodeGenDistribute);
6876     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6877   };
6878   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6879   emitPostUpdateForReductionClause(CGF, S,
6880                                    [](CodeGenFunction &) { return nullptr; });
6881 }
6882 
EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeSimdDirective & S)6883 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6884     CodeGenModule &CGM, StringRef ParentName,
6885     const OMPTargetTeamsDistributeSimdDirective &S) {
6886   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6887     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6888   };
6889   llvm::Function *Fn;
6890   llvm::Constant *Addr;
6891   // Emit target region as a standalone region.
6892   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6893       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6894   assert(Fn && Addr && "Target device function emission failed.");
6895 }
6896 
EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective & S)6897 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6898     const OMPTargetTeamsDistributeSimdDirective &S) {
6899   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6900     emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6901   };
6902   emitCommonOMPTargetDirective(*this, S, CodeGen);
6903 }
6904 
EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective & S)6905 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6906     const OMPTeamsDistributeDirective &S) {
6907 
6908   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6909     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6910   };
6911 
6912   // Emit teams region as a standalone region.
6913   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6914                                             PrePostActionTy &Action) {
6915     Action.Enter(CGF);
6916     OMPPrivateScope PrivateScope(CGF);
6917     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6918     (void)PrivateScope.Privatize();
6919     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6920                                                     CodeGenDistribute);
6921     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6922   };
6923   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6924   emitPostUpdateForReductionClause(*this, S,
6925                                    [](CodeGenFunction &) { return nullptr; });
6926 }
6927 
EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective & S)6928 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6929     const OMPTeamsDistributeSimdDirective &S) {
6930   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6931     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6932   };
6933 
6934   // Emit teams region as a standalone region.
6935   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6936                                             PrePostActionTy &Action) {
6937     Action.Enter(CGF);
6938     OMPPrivateScope PrivateScope(CGF);
6939     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6940     (void)PrivateScope.Privatize();
6941     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6942                                                     CodeGenDistribute);
6943     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6944   };
6945   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6946   emitPostUpdateForReductionClause(*this, S,
6947                                    [](CodeGenFunction &) { return nullptr; });
6948 }
6949 
EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective & S)6950 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6951     const OMPTeamsDistributeParallelForDirective &S) {
6952   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6953     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6954                               S.getDistInc());
6955   };
6956 
6957   // Emit teams region as a standalone region.
6958   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6959                                             PrePostActionTy &Action) {
6960     Action.Enter(CGF);
6961     OMPPrivateScope PrivateScope(CGF);
6962     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6963     (void)PrivateScope.Privatize();
6964     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6965                                                     CodeGenDistribute);
6966     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6967   };
6968   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6969   emitPostUpdateForReductionClause(*this, S,
6970                                    [](CodeGenFunction &) { return nullptr; });
6971 }
6972 
EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective & S)6973 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6974     const OMPTeamsDistributeParallelForSimdDirective &S) {
6975   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6976     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6977                               S.getDistInc());
6978   };
6979 
6980   // Emit teams region as a standalone region.
6981   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6982                                             PrePostActionTy &Action) {
6983     Action.Enter(CGF);
6984     OMPPrivateScope PrivateScope(CGF);
6985     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6986     (void)PrivateScope.Privatize();
6987     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6988         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6989     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6990   };
6991   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6992                               CodeGen);
6993   emitPostUpdateForReductionClause(*this, S,
6994                                    [](CodeGenFunction &) { return nullptr; });
6995 }
6996 
EmitOMPInteropDirective(const OMPInteropDirective & S)6997 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
6998   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6999   llvm::Value *Device = nullptr;
7000   llvm::Value *NumDependences = nullptr;
7001   llvm::Value *DependenceList = nullptr;
7002 
7003   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7004     Device = EmitScalarExpr(C->getDevice());
7005 
7006   // Build list and emit dependences
7007   OMPTaskDataTy Data;
7008   buildDependences(S, Data);
7009   if (!Data.Dependences.empty()) {
7010     Address DependenciesArray = Address::invalid();
7011     std::tie(NumDependences, DependenciesArray) =
7012         CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences,
7013                                                 S.getBeginLoc());
7014     DependenceList = DependenciesArray.getPointer();
7015   }
7016   Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7017 
7018   assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7019                                      S.getSingleClause<OMPDestroyClause>() ||
7020                                      S.getSingleClause<OMPUseClause>())) &&
7021          "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7022 
7023   if (const auto *C = S.getSingleClause<OMPInitClause>()) {
7024     llvm::Value *InteropvarPtr =
7025         EmitLValue(C->getInteropVar()).getPointer(*this);
7026     llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown;
7027     if (C->getIsTarget()) {
7028       InteropType = llvm::omp::OMPInteropType::Target;
7029     } else {
7030       assert(C->getIsTargetSync() && "Expected interop-type target/targetsync");
7031       InteropType = llvm::omp::OMPInteropType::TargetSync;
7032     }
7033     OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device,
7034                                     NumDependences, DependenceList,
7035                                     Data.HasNowaitClause);
7036   } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) {
7037     llvm::Value *InteropvarPtr =
7038         EmitLValue(C->getInteropVar()).getPointer(*this);
7039     OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7040                                        NumDependences, DependenceList,
7041                                        Data.HasNowaitClause);
7042   } else if (const auto *C = S.getSingleClause<OMPUseClause>()) {
7043     llvm::Value *InteropvarPtr =
7044         EmitLValue(C->getInteropVar()).getPointer(*this);
7045     OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7046                                    NumDependences, DependenceList,
7047                                    Data.HasNowaitClause);
7048   }
7049 }
7050 
emitTargetTeamsDistributeParallelForRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForDirective & S,PrePostActionTy & Action)7051 static void emitTargetTeamsDistributeParallelForRegion(
7052     CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7053     PrePostActionTy &Action) {
7054   Action.Enter(CGF);
7055   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7056     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7057                               S.getDistInc());
7058   };
7059 
7060   // Emit teams region as a standalone region.
7061   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7062                                                  PrePostActionTy &Action) {
7063     Action.Enter(CGF);
7064     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7065     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7066     (void)PrivateScope.Privatize();
7067     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7068         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7069     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7070   };
7071 
7072   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7073                               CodeGenTeams);
7074   emitPostUpdateForReductionClause(CGF, S,
7075                                    [](CodeGenFunction &) { return nullptr; });
7076 }
7077 
EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForDirective & S)7078 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7079     CodeGenModule &CGM, StringRef ParentName,
7080     const OMPTargetTeamsDistributeParallelForDirective &S) {
7081   // Emit SPMD target teams distribute parallel for region as a standalone
7082   // region.
7083   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7084     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7085   };
7086   llvm::Function *Fn;
7087   llvm::Constant *Addr;
7088   // Emit target region as a standalone region.
7089   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7090       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7091   assert(Fn && Addr && "Target device function emission failed.");
7092 }
7093 
EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective & S)7094 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7095     const OMPTargetTeamsDistributeParallelForDirective &S) {
7096   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7097     emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7098   };
7099   emitCommonOMPTargetDirective(*this, S, CodeGen);
7100 }
7101 
emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetTeamsDistributeParallelForSimdDirective & S,PrePostActionTy & Action)7102 static void emitTargetTeamsDistributeParallelForSimdRegion(
7103     CodeGenFunction &CGF,
7104     const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7105     PrePostActionTy &Action) {
7106   Action.Enter(CGF);
7107   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7108     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7109                               S.getDistInc());
7110   };
7111 
7112   // Emit teams region as a standalone region.
7113   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7114                                                  PrePostActionTy &Action) {
7115     Action.Enter(CGF);
7116     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7117     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7118     (void)PrivateScope.Privatize();
7119     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7120         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7121     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7122   };
7123 
7124   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7125                               CodeGenTeams);
7126   emitPostUpdateForReductionClause(CGF, S,
7127                                    [](CodeGenFunction &) { return nullptr; });
7128 }
7129 
EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsDistributeParallelForSimdDirective & S)7130 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7131     CodeGenModule &CGM, StringRef ParentName,
7132     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7133   // Emit SPMD target teams distribute parallel for simd region as a standalone
7134   // region.
7135   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7136     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7137   };
7138   llvm::Function *Fn;
7139   llvm::Constant *Addr;
7140   // Emit target region as a standalone region.
7141   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7142       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7143   assert(Fn && Addr && "Target device function emission failed.");
7144 }
7145 
EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective & S)7146 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7147     const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7148   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7149     emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7150   };
7151   emitCommonOMPTargetDirective(*this, S, CodeGen);
7152 }
7153 
EmitOMPCancellationPointDirective(const OMPCancellationPointDirective & S)7154 void CodeGenFunction::EmitOMPCancellationPointDirective(
7155     const OMPCancellationPointDirective &S) {
7156   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7157                                                    S.getCancelRegion());
7158 }
7159 
EmitOMPCancelDirective(const OMPCancelDirective & S)7160 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7161   const Expr *IfCond = nullptr;
7162   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7163     if (C->getNameModifier() == OMPD_unknown ||
7164         C->getNameModifier() == OMPD_cancel) {
7165       IfCond = C->getCondition();
7166       break;
7167     }
7168   }
7169   if (CGM.getLangOpts().OpenMPIRBuilder) {
7170     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7171     // TODO: This check is necessary as we only generate `omp parallel` through
7172     // the OpenMPIRBuilder for now.
7173     if (S.getCancelRegion() == OMPD_parallel ||
7174         S.getCancelRegion() == OMPD_sections ||
7175         S.getCancelRegion() == OMPD_section) {
7176       llvm::Value *IfCondition = nullptr;
7177       if (IfCond)
7178         IfCondition = EmitScalarExpr(IfCond,
7179                                      /*IgnoreResultAssign=*/true);
7180       return Builder.restoreIP(
7181           OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7182     }
7183   }
7184 
7185   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7186                                         S.getCancelRegion());
7187 }
7188 
7189 CodeGenFunction::JumpDest
getOMPCancelDestination(OpenMPDirectiveKind Kind)7190 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7191   if (Kind == OMPD_parallel || Kind == OMPD_task ||
7192       Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7193       Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7194     return ReturnBlock;
7195   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7196          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7197          Kind == OMPD_distribute_parallel_for ||
7198          Kind == OMPD_target_parallel_for ||
7199          Kind == OMPD_teams_distribute_parallel_for ||
7200          Kind == OMPD_target_teams_distribute_parallel_for);
7201   return OMPCancelStack.getExitBlock();
7202 }
7203 
EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,llvm::Value * > CaptureDeviceAddrMap)7204 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7205     const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7206     const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7207         CaptureDeviceAddrMap) {
7208   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7209   for (const Expr *OrigVarIt : C.varlists()) {
7210     const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7211     if (!Processed.insert(OrigVD).second)
7212       continue;
7213 
7214     // In order to identify the right initializer we need to match the
7215     // declaration used by the mapping logic. In some cases we may get
7216     // OMPCapturedExprDecl that refers to the original declaration.
7217     const ValueDecl *MatchingVD = OrigVD;
7218     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7219       // OMPCapturedExprDecl are used to privative fields of the current
7220       // structure.
7221       const auto *ME = cast<MemberExpr>(OED->getInit());
7222       assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7223              "Base should be the current struct!");
7224       MatchingVD = ME->getMemberDecl();
7225     }
7226 
7227     // If we don't have information about the current list item, move on to
7228     // the next one.
7229     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7230     if (InitAddrIt == CaptureDeviceAddrMap.end())
7231       continue;
7232 
7233     llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7234 
7235     // Return the address of the private variable.
7236     bool IsRegistered = PrivateScope.addPrivate(
7237         OrigVD,
7238         Address(InitAddrIt->second, Ty,
7239                 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7240     assert(IsRegistered && "firstprivate var already registered as private");
7241     // Silence the warning about unused variable.
7242     (void)IsRegistered;
7243   }
7244 }
7245 
getBaseDecl(const Expr * Ref)7246 static const VarDecl *getBaseDecl(const Expr *Ref) {
7247   const Expr *Base = Ref->IgnoreParenImpCasts();
7248   while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
7249     Base = OASE->getBase()->IgnoreParenImpCasts();
7250   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7251     Base = ASE->getBase()->IgnoreParenImpCasts();
7252   return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7253 }
7254 
EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause & C,OMPPrivateScope & PrivateScope,const llvm::DenseMap<const ValueDecl *,llvm::Value * > CaptureDeviceAddrMap)7255 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7256     const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7257     const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7258         CaptureDeviceAddrMap) {
7259   llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7260   for (const Expr *Ref : C.varlists()) {
7261     const VarDecl *OrigVD = getBaseDecl(Ref);
7262     if (!Processed.insert(OrigVD).second)
7263       continue;
7264     // In order to identify the right initializer we need to match the
7265     // declaration used by the mapping logic. In some cases we may get
7266     // OMPCapturedExprDecl that refers to the original declaration.
7267     const ValueDecl *MatchingVD = OrigVD;
7268     if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7269       // OMPCapturedExprDecl are used to privative fields of the current
7270       // structure.
7271       const auto *ME = cast<MemberExpr>(OED->getInit());
7272       assert(isa<CXXThisExpr>(ME->getBase()) &&
7273              "Base should be the current struct!");
7274       MatchingVD = ME->getMemberDecl();
7275     }
7276 
7277     // If we don't have information about the current list item, move on to
7278     // the next one.
7279     auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7280     if (InitAddrIt == CaptureDeviceAddrMap.end())
7281       continue;
7282 
7283     llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7284 
7285     Address PrivAddr =
7286         Address(InitAddrIt->second, Ty,
7287                 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7288     // For declrefs and variable length array need to load the pointer for
7289     // correct mapping, since the pointer to the data was passed to the runtime.
7290     if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7291         MatchingVD->getType()->isArrayType()) {
7292       QualType PtrTy = getContext().getPointerType(
7293           OrigVD->getType().getNonReferenceType());
7294       PrivAddr =
7295           EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)),
7296                             PtrTy->castAs<PointerType>());
7297     }
7298 
7299     (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7300   }
7301 }
7302 
7303 // Generate the instructions for '#pragma omp target data' directive.
EmitOMPTargetDataDirective(const OMPTargetDataDirective & S)7304 void CodeGenFunction::EmitOMPTargetDataDirective(
7305     const OMPTargetDataDirective &S) {
7306   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7307                                        /*SeparateBeginEndCalls=*/true);
7308 
7309   // Create a pre/post action to signal the privatization of the device pointer.
7310   // This action can be replaced by the OpenMP runtime code generation to
7311   // deactivate privatization.
7312   bool PrivatizeDevicePointers = false;
7313   class DevicePointerPrivActionTy : public PrePostActionTy {
7314     bool &PrivatizeDevicePointers;
7315 
7316   public:
7317     explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7318         : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7319     void Enter(CodeGenFunction &CGF) override {
7320       PrivatizeDevicePointers = true;
7321     }
7322   };
7323   DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7324 
7325   auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7326     auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7327       CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7328     };
7329 
7330     // Codegen that selects whether to generate the privatization code or not.
7331     auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7332       RegionCodeGenTy RCG(InnermostCodeGen);
7333       PrivatizeDevicePointers = false;
7334 
7335       // Call the pre-action to change the status of PrivatizeDevicePointers if
7336       // needed.
7337       Action.Enter(CGF);
7338 
7339       if (PrivatizeDevicePointers) {
7340         OMPPrivateScope PrivateScope(CGF);
7341         // Emit all instances of the use_device_ptr clause.
7342         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7343           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7344                                         Info.CaptureDeviceAddrMap);
7345         for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7346           CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7347                                          Info.CaptureDeviceAddrMap);
7348         (void)PrivateScope.Privatize();
7349         RCG(CGF);
7350       } else {
7351         // If we don't have target devices, don't bother emitting the data
7352         // mapping code.
7353         std::optional<OpenMPDirectiveKind> CaptureRegion;
7354         if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7355           // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7356           for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7357             for (const Expr *E : C->varlists()) {
7358               const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7359               if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7360                 CGF.EmitVarDecl(*OED);
7361             }
7362           for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7363             for (const Expr *E : C->varlists()) {
7364               const Decl *D = getBaseDecl(E);
7365               if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7366                 CGF.EmitVarDecl(*OED);
7367             }
7368         } else {
7369           CaptureRegion = OMPD_unknown;
7370         }
7371 
7372         OMPLexicalScope Scope(CGF, S, CaptureRegion);
7373         RCG(CGF);
7374       }
7375     };
7376 
7377     // Forward the provided action to the privatization codegen.
7378     RegionCodeGenTy PrivRCG(PrivCodeGen);
7379     PrivRCG.setAction(Action);
7380 
7381     // Notwithstanding the body of the region is emitted as inlined directive,
7382     // we don't use an inline scope as changes in the references inside the
7383     // region are expected to be visible outside, so we do not privative them.
7384     OMPLexicalScope Scope(CGF, S);
7385     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7386                                                     PrivRCG);
7387   };
7388 
7389   RegionCodeGenTy RCG(CodeGen);
7390 
7391   // If we don't have target devices, don't bother emitting the data mapping
7392   // code.
7393   if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7394     RCG(*this);
7395     return;
7396   }
7397 
7398   // Check if we have any if clause associated with the directive.
7399   const Expr *IfCond = nullptr;
7400   if (const auto *C = S.getSingleClause<OMPIfClause>())
7401     IfCond = C->getCondition();
7402 
7403   // Check if we have any device clause associated with the directive.
7404   const Expr *Device = nullptr;
7405   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7406     Device = C->getDevice();
7407 
7408   // Set the action to signal privatization of device pointers.
7409   RCG.setAction(PrivAction);
7410 
7411   // Emit region code.
7412   CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7413                                              Info);
7414 }
7415 
EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective & S)7416 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7417     const OMPTargetEnterDataDirective &S) {
7418   // If we don't have target devices, don't bother emitting the data mapping
7419   // code.
7420   if (CGM.getLangOpts().OMPTargetTriples.empty())
7421     return;
7422 
7423   // Check if we have any if clause associated with the directive.
7424   const Expr *IfCond = nullptr;
7425   if (const auto *C = S.getSingleClause<OMPIfClause>())
7426     IfCond = C->getCondition();
7427 
7428   // Check if we have any device clause associated with the directive.
7429   const Expr *Device = nullptr;
7430   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7431     Device = C->getDevice();
7432 
7433   OMPLexicalScope Scope(*this, S, OMPD_task);
7434   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7435 }
7436 
EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective & S)7437 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7438     const OMPTargetExitDataDirective &S) {
7439   // If we don't have target devices, don't bother emitting the data mapping
7440   // code.
7441   if (CGM.getLangOpts().OMPTargetTriples.empty())
7442     return;
7443 
7444   // Check if we have any if clause associated with the directive.
7445   const Expr *IfCond = nullptr;
7446   if (const auto *C = S.getSingleClause<OMPIfClause>())
7447     IfCond = C->getCondition();
7448 
7449   // Check if we have any device clause associated with the directive.
7450   const Expr *Device = nullptr;
7451   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7452     Device = C->getDevice();
7453 
7454   OMPLexicalScope Scope(*this, S, OMPD_task);
7455   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7456 }
7457 
emitTargetParallelRegion(CodeGenFunction & CGF,const OMPTargetParallelDirective & S,PrePostActionTy & Action)7458 static void emitTargetParallelRegion(CodeGenFunction &CGF,
7459                                      const OMPTargetParallelDirective &S,
7460                                      PrePostActionTy &Action) {
7461   // Get the captured statement associated with the 'parallel' region.
7462   const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7463   Action.Enter(CGF);
7464   auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7465     Action.Enter(CGF);
7466     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7467     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7468     CGF.EmitOMPPrivateClause(S, PrivateScope);
7469     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7470     (void)PrivateScope.Privatize();
7471     if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7472       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7473     // TODO: Add support for clauses.
7474     CGF.EmitStmt(CS->getCapturedStmt());
7475     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7476   };
7477   emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7478                                  emitEmptyBoundParameters);
7479   emitPostUpdateForReductionClause(CGF, S,
7480                                    [](CodeGenFunction &) { return nullptr; });
7481 }
7482 
EmitOMPTargetParallelDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelDirective & S)7483 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7484     CodeGenModule &CGM, StringRef ParentName,
7485     const OMPTargetParallelDirective &S) {
7486   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7487     emitTargetParallelRegion(CGF, S, Action);
7488   };
7489   llvm::Function *Fn;
7490   llvm::Constant *Addr;
7491   // Emit target region as a standalone region.
7492   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7493       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7494   assert(Fn && Addr && "Target device function emission failed.");
7495 }
7496 
EmitOMPTargetParallelDirective(const OMPTargetParallelDirective & S)7497 void CodeGenFunction::EmitOMPTargetParallelDirective(
7498     const OMPTargetParallelDirective &S) {
7499   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7500     emitTargetParallelRegion(CGF, S, Action);
7501   };
7502   emitCommonOMPTargetDirective(*this, S, CodeGen);
7503 }
7504 
emitTargetParallelForRegion(CodeGenFunction & CGF,const OMPTargetParallelForDirective & S,PrePostActionTy & Action)7505 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7506                                         const OMPTargetParallelForDirective &S,
7507                                         PrePostActionTy &Action) {
7508   Action.Enter(CGF);
7509   // Emit directive as a combined directive that consists of two implicit
7510   // directives: 'parallel' with 'for' directive.
7511   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7512     Action.Enter(CGF);
7513     CodeGenFunction::OMPCancelStackRAII CancelRegion(
7514         CGF, OMPD_target_parallel_for, S.hasCancel());
7515     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7516                                emitDispatchForLoopBounds);
7517   };
7518   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7519                                  emitEmptyBoundParameters);
7520 }
7521 
EmitOMPTargetParallelForDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForDirective & S)7522 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7523     CodeGenModule &CGM, StringRef ParentName,
7524     const OMPTargetParallelForDirective &S) {
7525   // Emit SPMD target parallel for region as a standalone region.
7526   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7527     emitTargetParallelForRegion(CGF, S, Action);
7528   };
7529   llvm::Function *Fn;
7530   llvm::Constant *Addr;
7531   // Emit target region as a standalone region.
7532   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7533       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7534   assert(Fn && Addr && "Target device function emission failed.");
7535 }
7536 
EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective & S)7537 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7538     const OMPTargetParallelForDirective &S) {
7539   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7540     emitTargetParallelForRegion(CGF, S, Action);
7541   };
7542   emitCommonOMPTargetDirective(*this, S, CodeGen);
7543 }
7544 
7545 static void
emitTargetParallelForSimdRegion(CodeGenFunction & CGF,const OMPTargetParallelForSimdDirective & S,PrePostActionTy & Action)7546 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7547                                 const OMPTargetParallelForSimdDirective &S,
7548                                 PrePostActionTy &Action) {
7549   Action.Enter(CGF);
7550   // Emit directive as a combined directive that consists of two implicit
7551   // directives: 'parallel' with 'for' directive.
7552   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7553     Action.Enter(CGF);
7554     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7555                                emitDispatchForLoopBounds);
7556   };
7557   emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7558                                  emitEmptyBoundParameters);
7559 }
7560 
EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelForSimdDirective & S)7561 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7562     CodeGenModule &CGM, StringRef ParentName,
7563     const OMPTargetParallelForSimdDirective &S) {
7564   // Emit SPMD target parallel for region as a standalone region.
7565   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7566     emitTargetParallelForSimdRegion(CGF, S, Action);
7567   };
7568   llvm::Function *Fn;
7569   llvm::Constant *Addr;
7570   // Emit target region as a standalone region.
7571   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7572       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7573   assert(Fn && Addr && "Target device function emission failed.");
7574 }
7575 
EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective & S)7576 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7577     const OMPTargetParallelForSimdDirective &S) {
7578   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7579     emitTargetParallelForSimdRegion(CGF, S, Action);
7580   };
7581   emitCommonOMPTargetDirective(*this, S, CodeGen);
7582 }
7583 
7584 /// Emit a helper variable and return corresponding lvalue.
mapParam(CodeGenFunction & CGF,const DeclRefExpr * Helper,const ImplicitParamDecl * PVD,CodeGenFunction::OMPPrivateScope & Privates)7585 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7586                      const ImplicitParamDecl *PVD,
7587                      CodeGenFunction::OMPPrivateScope &Privates) {
7588   const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7589   Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
7590 }
7591 
EmitOMPTaskLoopBasedDirective(const OMPLoopDirective & S)7592 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7593   assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7594   // Emit outlined function for task construct.
7595   const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7596   Address CapturedStruct = Address::invalid();
7597   {
7598     OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7599     CapturedStruct = GenerateCapturedStmtArgument(*CS);
7600   }
7601   QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7602   const Expr *IfCond = nullptr;
7603   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7604     if (C->getNameModifier() == OMPD_unknown ||
7605         C->getNameModifier() == OMPD_taskloop) {
7606       IfCond = C->getCondition();
7607       break;
7608     }
7609   }
7610 
7611   OMPTaskDataTy Data;
7612   // Check if taskloop must be emitted without taskgroup.
7613   Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7614   // TODO: Check if we should emit tied or untied task.
7615   Data.Tied = true;
7616   // Set scheduling for taskloop
7617   if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7618     // grainsize clause
7619     Data.Schedule.setInt(/*IntVal=*/false);
7620     Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7621   } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7622     // num_tasks clause
7623     Data.Schedule.setInt(/*IntVal=*/true);
7624     Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7625   }
7626 
7627   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7628     // if (PreCond) {
7629     //   for (IV in 0..LastIteration) BODY;
7630     //   <Final counter/linear vars updates>;
7631     // }
7632     //
7633 
7634     // Emit: if (PreCond) - begin.
7635     // If the condition constant folds and can be elided, avoid emitting the
7636     // whole loop.
7637     bool CondConstant;
7638     llvm::BasicBlock *ContBlock = nullptr;
7639     OMPLoopScope PreInitScope(CGF, S);
7640     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7641       if (!CondConstant)
7642         return;
7643     } else {
7644       llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7645       ContBlock = CGF.createBasicBlock("taskloop.if.end");
7646       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7647                   CGF.getProfileCount(&S));
7648       CGF.EmitBlock(ThenBlock);
7649       CGF.incrementProfileCounter(&S);
7650     }
7651 
7652     (void)CGF.EmitOMPLinearClauseInit(S);
7653 
7654     OMPPrivateScope LoopScope(CGF);
7655     // Emit helper vars inits.
7656     enum { LowerBound = 5, UpperBound, Stride, LastIter };
7657     auto *I = CS->getCapturedDecl()->param_begin();
7658     auto *LBP = std::next(I, LowerBound);
7659     auto *UBP = std::next(I, UpperBound);
7660     auto *STP = std::next(I, Stride);
7661     auto *LIP = std::next(I, LastIter);
7662     mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7663              LoopScope);
7664     mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7665              LoopScope);
7666     mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7667     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7668              LoopScope);
7669     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7670     CGF.EmitOMPLinearClause(S, LoopScope);
7671     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7672     (void)LoopScope.Privatize();
7673     // Emit the loop iteration variable.
7674     const Expr *IVExpr = S.getIterationVariable();
7675     const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7676     CGF.EmitVarDecl(*IVDecl);
7677     CGF.EmitIgnoredExpr(S.getInit());
7678 
7679     // Emit the iterations count variable.
7680     // If it is not a variable, Sema decided to calculate iterations count on
7681     // each iteration (e.g., it is foldable into a constant).
7682     if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7683       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7684       // Emit calculation of the iterations count.
7685       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7686     }
7687 
7688     {
7689       OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7690       emitCommonSimdLoop(
7691           CGF, S,
7692           [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7693             if (isOpenMPSimdDirective(S.getDirectiveKind()))
7694               CGF.EmitOMPSimdInit(S);
7695           },
7696           [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7697             CGF.EmitOMPInnerLoop(
7698                 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7699                 [&S](CodeGenFunction &CGF) {
7700                   emitOMPLoopBodyWithStopPoint(CGF, S,
7701                                                CodeGenFunction::JumpDest());
7702                 },
7703                 [](CodeGenFunction &) {});
7704           });
7705     }
7706     // Emit: if (PreCond) - end.
7707     if (ContBlock) {
7708       CGF.EmitBranch(ContBlock);
7709       CGF.EmitBlock(ContBlock, true);
7710     }
7711     // Emit final copy of the lastprivate variables if IsLastIter != 0.
7712     if (HasLastprivateClause) {
7713       CGF.EmitOMPLastprivateClauseFinal(
7714           S, isOpenMPSimdDirective(S.getDirectiveKind()),
7715           CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7716               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7717               (*LIP)->getType(), S.getBeginLoc())));
7718     }
7719     LoopScope.restoreMap();
7720     CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7721       return CGF.Builder.CreateIsNotNull(
7722           CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7723                                (*LIP)->getType(), S.getBeginLoc()));
7724     });
7725   };
7726   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7727                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7728                             const OMPTaskDataTy &Data) {
7729     auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7730                       &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7731       OMPLoopScope PreInitScope(CGF, S);
7732       CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7733                                                   OutlinedFn, SharedsTy,
7734                                                   CapturedStruct, IfCond, Data);
7735     };
7736     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7737                                                     CodeGen);
7738   };
7739   if (Data.Nogroup) {
7740     EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7741   } else {
7742     CGM.getOpenMPRuntime().emitTaskgroupRegion(
7743         *this,
7744         [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7745                                         PrePostActionTy &Action) {
7746           Action.Enter(CGF);
7747           CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7748                                         Data);
7749         },
7750         S.getBeginLoc());
7751   }
7752 }
7753 
EmitOMPTaskLoopDirective(const OMPTaskLoopDirective & S)7754 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7755   auto LPCRegion =
7756       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7757   EmitOMPTaskLoopBasedDirective(S);
7758 }
7759 
EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective & S)7760 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7761     const OMPTaskLoopSimdDirective &S) {
7762   auto LPCRegion =
7763       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7764   OMPLexicalScope Scope(*this, S);
7765   EmitOMPTaskLoopBasedDirective(S);
7766 }
7767 
EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective & S)7768 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7769     const OMPMasterTaskLoopDirective &S) {
7770   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7771     Action.Enter(CGF);
7772     EmitOMPTaskLoopBasedDirective(S);
7773   };
7774   auto LPCRegion =
7775       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7776   OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7777   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7778 }
7779 
EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective & S)7780 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7781     const OMPMasterTaskLoopSimdDirective &S) {
7782   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7783     Action.Enter(CGF);
7784     EmitOMPTaskLoopBasedDirective(S);
7785   };
7786   auto LPCRegion =
7787       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7788   OMPLexicalScope Scope(*this, S);
7789   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7790 }
7791 
EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective & S)7792 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7793     const OMPParallelMasterTaskLoopDirective &S) {
7794   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7795     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7796                                   PrePostActionTy &Action) {
7797       Action.Enter(CGF);
7798       CGF.EmitOMPTaskLoopBasedDirective(S);
7799     };
7800     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7801     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7802                                             S.getBeginLoc());
7803   };
7804   auto LPCRegion =
7805       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7806   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7807                                  emitEmptyBoundParameters);
7808 }
7809 
EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective & S)7810 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7811     const OMPParallelMasterTaskLoopSimdDirective &S) {
7812   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7813     auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7814                                   PrePostActionTy &Action) {
7815       Action.Enter(CGF);
7816       CGF.EmitOMPTaskLoopBasedDirective(S);
7817     };
7818     OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7819     CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7820                                             S.getBeginLoc());
7821   };
7822   auto LPCRegion =
7823       CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7824   emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7825                                  emitEmptyBoundParameters);
7826 }
7827 
7828 // Generate the instructions for '#pragma omp target update' directive.
EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective & S)7829 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7830     const OMPTargetUpdateDirective &S) {
7831   // If we don't have target devices, don't bother emitting the data mapping
7832   // code.
7833   if (CGM.getLangOpts().OMPTargetTriples.empty())
7834     return;
7835 
7836   // Check if we have any if clause associated with the directive.
7837   const Expr *IfCond = nullptr;
7838   if (const auto *C = S.getSingleClause<OMPIfClause>())
7839     IfCond = C->getCondition();
7840 
7841   // Check if we have any device clause associated with the directive.
7842   const Expr *Device = nullptr;
7843   if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7844     Device = C->getDevice();
7845 
7846   OMPLexicalScope Scope(*this, S, OMPD_task);
7847   CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7848 }
7849 
EmitOMPGenericLoopDirective(const OMPGenericLoopDirective & S)7850 void CodeGenFunction::EmitOMPGenericLoopDirective(
7851     const OMPGenericLoopDirective &S) {
7852   // Unimplemented, just inline the underlying statement for now.
7853   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7854     // Emit the loop iteration variable.
7855     const Stmt *CS =
7856         cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7857     const auto *ForS = dyn_cast<ForStmt>(CS);
7858     if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7859       OMPPrivateScope LoopScope(CGF);
7860       CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7861       (void)LoopScope.Privatize();
7862       CGF.EmitStmt(CS);
7863       LoopScope.restoreMap();
7864     } else {
7865       CGF.EmitStmt(CS);
7866     }
7867   };
7868   OMPLexicalScope Scope(*this, S, OMPD_unknown);
7869   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7870 }
7871 
EmitOMPParallelGenericLoopDirective(const OMPLoopDirective & S)7872 void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7873     const OMPLoopDirective &S) {
7874   // Emit combined directive as if its consituent constructs are 'parallel'
7875   // and 'for'.
7876   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7877     Action.Enter(CGF);
7878     emitOMPCopyinClause(CGF, S);
7879     (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7880   };
7881   {
7882     auto LPCRegion =
7883         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7884     emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
7885                                    emitEmptyBoundParameters);
7886   }
7887   // Check for outer lastprivate conditional update.
7888   checkForLastprivateConditionalUpdate(*this, S);
7889 }
7890 
EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective & S)7891 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7892     const OMPTeamsGenericLoopDirective &S) {
7893   // To be consistent with current behavior of 'target teams loop', emit
7894   // 'teams loop' as if its constituent constructs are 'distribute,
7895   // 'parallel, and 'for'.
7896   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7897     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7898                               S.getDistInc());
7899   };
7900 
7901   // Emit teams region as a standalone region.
7902   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7903                                             PrePostActionTy &Action) {
7904     Action.Enter(CGF);
7905     OMPPrivateScope PrivateScope(CGF);
7906     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7907     (void)PrivateScope.Privatize();
7908     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7909                                                     CodeGenDistribute);
7910     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7911   };
7912   emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
7913   emitPostUpdateForReductionClause(*this, S,
7914                                    [](CodeGenFunction &) { return nullptr; });
7915 }
7916 
7917 static void
emitTargetTeamsGenericLoopRegion(CodeGenFunction & CGF,const OMPTargetTeamsGenericLoopDirective & S,PrePostActionTy & Action)7918 emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
7919                                  const OMPTargetTeamsGenericLoopDirective &S,
7920                                  PrePostActionTy &Action) {
7921   Action.Enter(CGF);
7922   // Emit 'teams loop' as if its constituent constructs are 'distribute,
7923   // 'parallel, and 'for'.
7924   auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7925     CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7926                               S.getDistInc());
7927   };
7928 
7929   // Emit teams region as a standalone region.
7930   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7931                                                  PrePostActionTy &Action) {
7932     Action.Enter(CGF);
7933     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7934     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7935     (void)PrivateScope.Privatize();
7936     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7937         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7938     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7939   };
7940 
7941   emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7942                               CodeGenTeams);
7943   emitPostUpdateForReductionClause(CGF, S,
7944                                    [](CodeGenFunction &) { return nullptr; });
7945 }
7946 
7947 /// Emit combined directive 'target teams loop' as if its constituent
7948 /// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
EmitOMPTargetTeamsGenericLoopDirective(const OMPTargetTeamsGenericLoopDirective & S)7949 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
7950     const OMPTargetTeamsGenericLoopDirective &S) {
7951   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7952     emitTargetTeamsGenericLoopRegion(CGF, S, Action);
7953   };
7954   emitCommonOMPTargetDirective(*this, S, CodeGen);
7955 }
7956 
EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetTeamsGenericLoopDirective & S)7957 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
7958     CodeGenModule &CGM, StringRef ParentName,
7959     const OMPTargetTeamsGenericLoopDirective &S) {
7960   // Emit SPMD target parallel loop region as a standalone region.
7961   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7962     emitTargetTeamsGenericLoopRegion(CGF, S, Action);
7963   };
7964   llvm::Function *Fn;
7965   llvm::Constant *Addr;
7966   // Emit target region as a standalone region.
7967   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7968       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7969   assert(Fn && Addr &&
7970          "Target device function emission failed for 'target teams loop'.");
7971 }
7972 
emitTargetParallelGenericLoopRegion(CodeGenFunction & CGF,const OMPTargetParallelGenericLoopDirective & S,PrePostActionTy & Action)7973 static void emitTargetParallelGenericLoopRegion(
7974     CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
7975     PrePostActionTy &Action) {
7976   Action.Enter(CGF);
7977   // Emit as 'parallel for'.
7978   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7979     Action.Enter(CGF);
7980     CodeGenFunction::OMPCancelStackRAII CancelRegion(
7981         CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
7982     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7983                                emitDispatchForLoopBounds);
7984   };
7985   emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7986                                  emitEmptyBoundParameters);
7987 }
7988 
EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule & CGM,StringRef ParentName,const OMPTargetParallelGenericLoopDirective & S)7989 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
7990     CodeGenModule &CGM, StringRef ParentName,
7991     const OMPTargetParallelGenericLoopDirective &S) {
7992   // Emit target parallel loop region as a standalone region.
7993   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7994     emitTargetParallelGenericLoopRegion(CGF, S, Action);
7995   };
7996   llvm::Function *Fn;
7997   llvm::Constant *Addr;
7998   // Emit target region as a standalone region.
7999   CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8000       S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8001   assert(Fn && Addr && "Target device function emission failed.");
8002 }
8003 
8004 /// Emit combined directive 'target parallel loop' as if its constituent
8005 /// constructs are 'target', 'parallel', and 'for'.
EmitOMPTargetParallelGenericLoopDirective(const OMPTargetParallelGenericLoopDirective & S)8006 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8007     const OMPTargetParallelGenericLoopDirective &S) {
8008   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8009     emitTargetParallelGenericLoopRegion(CGF, S, Action);
8010   };
8011   emitCommonOMPTargetDirective(*this, S, CodeGen);
8012 }
8013 
EmitSimpleOMPExecutableDirective(const OMPExecutableDirective & D)8014 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8015     const OMPExecutableDirective &D) {
8016   if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
8017     EmitOMPScanDirective(*SD);
8018     return;
8019   }
8020   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8021     return;
8022   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8023     OMPPrivateScope GlobalsScope(CGF);
8024     if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8025       // Capture global firstprivates to avoid crash.
8026       for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8027         for (const Expr *Ref : C->varlists()) {
8028           const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8029           if (!DRE)
8030             continue;
8031           const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8032           if (!VD || VD->hasLocalStorage())
8033             continue;
8034           if (!CGF.LocalDeclMap.count(VD)) {
8035             LValue GlobLVal = CGF.EmitLValue(Ref);
8036             GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8037           }
8038         }
8039       }
8040     }
8041     if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8042       (void)GlobalsScope.Privatize();
8043       ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8044       emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
8045     } else {
8046       if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
8047         for (const Expr *E : LD->counters()) {
8048           const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
8049           if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8050             LValue GlobLVal = CGF.EmitLValue(E);
8051             GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8052           }
8053           if (isa<OMPCapturedExprDecl>(VD)) {
8054             // Emit only those that were not explicitly referenced in clauses.
8055             if (!CGF.LocalDeclMap.count(VD))
8056               CGF.EmitVarDecl(*VD);
8057           }
8058         }
8059         for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8060           if (!C->getNumForLoops())
8061             continue;
8062           for (unsigned I = LD->getLoopsNumber(),
8063                         E = C->getLoopNumIterations().size();
8064                I < E; ++I) {
8065             if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8066                     cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8067               // Emit only those that were not explicitly referenced in clauses.
8068               if (!CGF.LocalDeclMap.count(VD))
8069                 CGF.EmitVarDecl(*VD);
8070             }
8071           }
8072         }
8073       }
8074       (void)GlobalsScope.Privatize();
8075       CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
8076     }
8077   };
8078   if (D.getDirectiveKind() == OMPD_atomic ||
8079       D.getDirectiveKind() == OMPD_critical ||
8080       D.getDirectiveKind() == OMPD_section ||
8081       D.getDirectiveKind() == OMPD_master ||
8082       D.getDirectiveKind() == OMPD_masked ||
8083       D.getDirectiveKind() == OMPD_unroll) {
8084     EmitStmt(D.getAssociatedStmt());
8085   } else {
8086     auto LPCRegion =
8087         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
8088     OMPSimdLexicalScope Scope(*this, D);
8089     CGM.getOpenMPRuntime().emitInlinedDirective(
8090         *this,
8091         isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8092                                                     : D.getDirectiveKind(),
8093         CodeGen);
8094   }
8095   // Check for outer lastprivate conditional update.
8096   checkForLastprivateConditionalUpdate(*this, D);
8097 }
8098