1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Analysis/MemorySSAUpdater.h"
18 #include "llvm/Frontend/OpenMP/OMPConstants.h"
19 #include "llvm/IR/DebugLoc.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/Support/Allocator.h"
22 #include "llvm/TargetParser/Triple.h"
23 #include <forward_list>
24 #include <map>
25 #include <optional>
26 
27 namespace llvm {
28 class CanonicalLoopInfo;
29 struct TargetRegionEntryInfo;
30 class OffloadEntriesInfoManager;
31 class OpenMPIRBuilder;
32 
33 /// Move the instruction after an InsertPoint to the beginning of another
34 /// BasicBlock.
35 ///
36 /// The instructions after \p IP are moved to the beginning of \p New which must
37 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
38 /// \p New will be added such that there is no semantic change. Otherwise, the
39 /// \p IP insert block remains degenerate and it is up to the caller to insert a
40 /// terminator.
41 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
42               bool CreateBranch);
43 
44 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
45 /// insert location will stick to after the instruction before the insertion
46 /// point (instead of moving with the instruction the InsertPoint stores
47 /// internally).
48 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
49 
50 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
51 /// (missing the terminator).
52 ///
53 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
54 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
55 /// is true, a branch to the new successor will new created such that
56 /// semantically there is no change; otherwise the block of the insertion point
57 /// remains degenerate and it is the caller's responsibility to insert a
58 /// terminator. Returns the new successor block.
59 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
60                     llvm::Twine Name = {});
61 
62 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
63 /// degenerate (missing the terminator).  Its new insert location will stick to
64 /// after the instruction before the insertion point (instead of moving with the
65 /// instruction the InsertPoint stores internally).
66 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
67                     llvm::Twine Name = {});
68 
69 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
70 /// degenerate (missing the terminator).  Its new insert location will stick to
71 /// after the instruction before the insertion point (instead of moving with the
72 /// instruction the InsertPoint stores internally).
73 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
74 
75 /// Like splitBB, but reuses the current block's name for the new name.
76 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
77                               llvm::Twine Suffix = ".split");
78 
79 /// Captures attributes that affect generating LLVM-IR using the
80 /// OpenMPIRBuilder and related classes. Note that not all attributes are
81 /// required for all classes or functions. In some use cases the configuration
82 /// is not necessary at all, because because the only functions that are called
83 /// are ones that are not dependent on the configuration.
84 class OpenMPIRBuilderConfig {
85 public:
86   /// Flag for specifying if the compilation is done for embedded device code
87   /// or host code.
88   std::optional<bool> IsTargetDevice;
89 
90   /// Flag for specifying if the compilation is done for an accelerator.
91   std::optional<bool> IsGPU;
92 
93   // Flag for specifying if offloading is mandatory.
94   std::optional<bool> OpenMPOffloadMandatory;
95 
96   /// First separator used between the initial two parts of a name.
97   std::optional<StringRef> FirstSeparator;
98   /// Separator used between all of the rest consecutive parts of s name
99   std::optional<StringRef> Separator;
100 
101   OpenMPIRBuilderConfig();
102   OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
103                         bool OpenMPOffloadMandatory,
104                         bool HasRequiresReverseOffload,
105                         bool HasRequiresUnifiedAddress,
106                         bool HasRequiresUnifiedSharedMemory,
107                         bool HasRequiresDynamicAllocators);
108 
109   // Getters functions that assert if the required values are not present.
isTargetDevice()110   bool isTargetDevice() const {
111     assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
112     return *IsTargetDevice;
113   }
114 
isGPU()115   bool isGPU() const {
116     assert(IsGPU.has_value() && "IsGPU is not set");
117     return *IsGPU;
118   }
119 
openMPOffloadMandatory()120   bool openMPOffloadMandatory() const {
121     assert(OpenMPOffloadMandatory.has_value() &&
122            "OpenMPOffloadMandatory is not set");
123     return *OpenMPOffloadMandatory;
124   }
125 
hasRequiresFlags()126   bool hasRequiresFlags() const { return RequiresFlags; }
127   bool hasRequiresReverseOffload() const;
128   bool hasRequiresUnifiedAddress() const;
129   bool hasRequiresUnifiedSharedMemory() const;
130   bool hasRequiresDynamicAllocators() const;
131 
132   /// Returns requires directive clauses as flags compatible with those expected
133   /// by libomptarget.
134   int64_t getRequiresFlags() const;
135 
136   // Returns the FirstSeparator if set, otherwise use the default separator
137   // depending on isGPU
firstSeparator()138   StringRef firstSeparator() const {
139     if (FirstSeparator.has_value())
140       return *FirstSeparator;
141     if (isGPU())
142       return "_";
143     return ".";
144   }
145 
146   // Returns the Separator if set, otherwise use the default separator depending
147   // on isGPU
separator()148   StringRef separator() const {
149     if (Separator.has_value())
150       return *Separator;
151     if (isGPU())
152       return "$";
153     return ".";
154   }
155 
setIsTargetDevice(bool Value)156   void setIsTargetDevice(bool Value) { IsTargetDevice = Value; }
setIsGPU(bool Value)157   void setIsGPU(bool Value) { IsGPU = Value; }
setOpenMPOffloadMandatory(bool Value)158   void setOpenMPOffloadMandatory(bool Value) { OpenMPOffloadMandatory = Value; }
setFirstSeparator(StringRef FS)159   void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
setSeparator(StringRef S)160   void setSeparator(StringRef S) { Separator = S; }
161 
162   void setHasRequiresReverseOffload(bool Value);
163   void setHasRequiresUnifiedAddress(bool Value);
164   void setHasRequiresUnifiedSharedMemory(bool Value);
165   void setHasRequiresDynamicAllocators(bool Value);
166 
167 private:
168   /// Flags for specifying which requires directive clauses are present.
169   int64_t RequiresFlags;
170 };
171 
172 /// Data structure to contain the information needed to uniquely identify
173 /// a target entry.
174 struct TargetRegionEntryInfo {
175   std::string ParentName;
176   unsigned DeviceID;
177   unsigned FileID;
178   unsigned Line;
179   unsigned Count;
180 
TargetRegionEntryInfoTargetRegionEntryInfo181   TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {}
182   TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
183                         unsigned FileID, unsigned Line, unsigned Count = 0)
ParentNameTargetRegionEntryInfo184       : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
185         Count(Count) {}
186 
187   static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
188                                          StringRef ParentName,
189                                          unsigned DeviceID, unsigned FileID,
190                                          unsigned Line, unsigned Count);
191 
192   bool operator<(const TargetRegionEntryInfo RHS) const {
193     return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
194            std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
195                            RHS.Count);
196   }
197 };
198 
199 /// Class that manages information about offload code regions and data
200 class OffloadEntriesInfoManager {
201   /// Number of entries registered so far.
202   OpenMPIRBuilder *OMPBuilder;
203   unsigned OffloadingEntriesNum = 0;
204 
205 public:
206   /// Base class of the entries info.
207   class OffloadEntryInfo {
208   public:
209     /// Kind of a given entry.
210     enum OffloadingEntryInfoKinds : unsigned {
211       /// Entry is a target region.
212       OffloadingEntryInfoTargetRegion = 0,
213       /// Entry is a declare target variable.
214       OffloadingEntryInfoDeviceGlobalVar = 1,
215       /// Invalid entry info.
216       OffloadingEntryInfoInvalid = ~0u
217     };
218 
219   protected:
220     OffloadEntryInfo() = delete;
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)221     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
OffloadEntryInfo(OffloadingEntryInfoKinds Kind,unsigned Order,uint32_t Flags)222     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
223                               uint32_t Flags)
224         : Flags(Flags), Order(Order), Kind(Kind) {}
225     ~OffloadEntryInfo() = default;
226 
227   public:
isValid()228     bool isValid() const { return Order != ~0u; }
getOrder()229     unsigned getOrder() const { return Order; }
getKind()230     OffloadingEntryInfoKinds getKind() const { return Kind; }
getFlags()231     uint32_t getFlags() const { return Flags; }
setFlags(uint32_t NewFlags)232     void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
getAddress()233     Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
setAddress(Constant * V)234     void setAddress(Constant *V) {
235       assert(!Addr.pointsToAliveValue() && "Address has been set before!");
236       Addr = V;
237     }
classof(const OffloadEntryInfo * Info)238     static bool classof(const OffloadEntryInfo *Info) { return true; }
239 
240   private:
241     /// Address of the entity that has to be mapped for offloading.
242     WeakTrackingVH Addr;
243 
244     /// Flags associated with the device global.
245     uint32_t Flags = 0u;
246 
247     /// Order this entry was emitted.
248     unsigned Order = ~0u;
249 
250     OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
251   };
252 
253   /// Return true if a there are no entries defined.
254   bool empty() const;
255   /// Return number of entries defined so far.
size()256   unsigned size() const { return OffloadingEntriesNum; }
257 
OffloadEntriesInfoManager(OpenMPIRBuilder * builder)258   OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
259 
260   //
261   // Target region entries related.
262   //
263 
264   /// Kind of the target registry entry.
265   enum OMPTargetRegionEntryKind : uint32_t {
266     /// Mark the entry as target region.
267     OMPTargetRegionEntryTargetRegion = 0x0,
268   };
269 
270   /// Target region entries info.
271   class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
272     /// Address that can be used as the ID of the entry.
273     Constant *ID = nullptr;
274 
275   public:
OffloadEntryInfoTargetRegion()276     OffloadEntryInfoTargetRegion()
277         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
OffloadEntryInfoTargetRegion(unsigned Order,Constant * Addr,Constant * ID,OMPTargetRegionEntryKind Flags)278     explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
279                                           Constant *ID,
280                                           OMPTargetRegionEntryKind Flags)
281         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
282           ID(ID) {
283       setAddress(Addr);
284     }
285 
getID()286     Constant *getID() const { return ID; }
setID(Constant * V)287     void setID(Constant *V) {
288       assert(!ID && "ID has been set before!");
289       ID = V;
290     }
classof(const OffloadEntryInfo * Info)291     static bool classof(const OffloadEntryInfo *Info) {
292       return Info->getKind() == OffloadingEntryInfoTargetRegion;
293     }
294   };
295 
296   /// Initialize target region entry.
297   /// This is ONLY needed for DEVICE compilation.
298   void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
299                                        unsigned Order);
300   /// Register target region entry.
301   void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
302                                      Constant *Addr, Constant *ID,
303                                      OMPTargetRegionEntryKind Flags);
304   /// Return true if a target region entry with the provided information
305   /// exists.
306   bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
307                                 bool IgnoreAddressId = false) const;
308 
309   // Return the Name based on \a EntryInfo using the next available Count.
310   void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
311                                   const TargetRegionEntryInfo &EntryInfo);
312 
313   /// brief Applies action \a Action on all registered entries.
314   typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
315                             const OffloadEntryInfoTargetRegion &)>
316       OffloadTargetRegionEntryInfoActTy;
317   void
318   actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
319 
320   //
321   // Device global variable entries related.
322   //
323 
324   /// Kind of the global variable entry..
325   enum OMPTargetGlobalVarEntryKind : uint32_t {
326     /// Mark the entry as a to declare target.
327     OMPTargetGlobalVarEntryTo = 0x0,
328     /// Mark the entry as a to declare target link.
329     OMPTargetGlobalVarEntryLink = 0x1,
330     /// Mark the entry as a declare target enter.
331     OMPTargetGlobalVarEntryEnter = 0x2,
332     /// Mark the entry as having no declare target entry kind.
333     OMPTargetGlobalVarEntryNone = 0x3,
334     /// Mark the entry as a declare target indirect global.
335     OMPTargetGlobalVarEntryIndirect = 0x8,
336   };
337 
338   /// Kind of device clause for declare target variables
339   /// and functions
340   /// NOTE: Currently not used as a part of a variable entry
341   /// used for Flang and Clang to interface with the variable
342   /// related registration functions
343   enum OMPTargetDeviceClauseKind : uint32_t {
344     /// The target is marked for all devices
345     OMPTargetDeviceClauseAny = 0x0,
346     /// The target is marked for non-host devices
347     OMPTargetDeviceClauseNoHost = 0x1,
348     /// The target is marked for host devices
349     OMPTargetDeviceClauseHost = 0x2,
350     /// The target is marked as having no clause
351     OMPTargetDeviceClauseNone = 0x3
352   };
353 
354   /// Device global variable entries info.
355   class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
356     /// Type of the global variable.
357     int64_t VarSize;
358     GlobalValue::LinkageTypes Linkage;
359     const std::string VarName;
360 
361   public:
OffloadEntryInfoDeviceGlobalVar()362     OffloadEntryInfoDeviceGlobalVar()
363         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
OffloadEntryInfoDeviceGlobalVar(unsigned Order,OMPTargetGlobalVarEntryKind Flags)364     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
365                                              OMPTargetGlobalVarEntryKind Flags)
366         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
OffloadEntryInfoDeviceGlobalVar(unsigned Order,Constant * Addr,int64_t VarSize,OMPTargetGlobalVarEntryKind Flags,GlobalValue::LinkageTypes Linkage,const std::string & VarName)367     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
368                                              int64_t VarSize,
369                                              OMPTargetGlobalVarEntryKind Flags,
370                                              GlobalValue::LinkageTypes Linkage,
371                                              const std::string &VarName)
372         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
373           VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
374       setAddress(Addr);
375     }
376 
getVarSize()377     int64_t getVarSize() const { return VarSize; }
getVarName()378     StringRef getVarName() const { return VarName; }
setVarSize(int64_t Size)379     void setVarSize(int64_t Size) { VarSize = Size; }
getLinkage()380     GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
setLinkage(GlobalValue::LinkageTypes LT)381     void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
classof(const OffloadEntryInfo * Info)382     static bool classof(const OffloadEntryInfo *Info) {
383       return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
384     }
385   };
386 
387   /// Initialize device global variable entry.
388   /// This is ONLY used for DEVICE compilation.
389   void initializeDeviceGlobalVarEntryInfo(StringRef Name,
390                                           OMPTargetGlobalVarEntryKind Flags,
391                                           unsigned Order);
392 
393   /// Register device global variable entry.
394   void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
395                                         int64_t VarSize,
396                                         OMPTargetGlobalVarEntryKind Flags,
397                                         GlobalValue::LinkageTypes Linkage);
398   /// Checks if the variable with the given name has been registered already.
hasDeviceGlobalVarEntryInfo(StringRef VarName)399   bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
400     return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
401   }
402   /// Applies action \a Action on all registered entries.
403   typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
404       OffloadDeviceGlobalVarEntryInfoActTy;
405   void actOnDeviceGlobalVarEntriesInfo(
406       const OffloadDeviceGlobalVarEntryInfoActTy &Action);
407 
408 private:
409   /// Return the count of entries at a particular source location.
410   unsigned
411   getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
412 
413   /// Update the count of entries at a particular source location.
414   void
415   incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
416 
417   static TargetRegionEntryInfo
getTargetRegionEntryCountKey(const TargetRegionEntryInfo & EntryInfo)418   getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
419     return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
420                                  EntryInfo.FileID, EntryInfo.Line, 0);
421   }
422 
423   // Count of entries at a location.
424   std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
425 
426   // Storage for target region entries kind.
427   typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
428       OffloadEntriesTargetRegionTy;
429   OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
430   /// Storage for device global variable entries kind. The storage is to be
431   /// indexed by mangled name.
432   typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
433       OffloadEntriesDeviceGlobalVarTy;
434   OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
435 };
436 
437 /// An interface to create LLVM-IR for OpenMP directives.
438 ///
439 /// Each OpenMP directive has a corresponding public generator method.
440 class OpenMPIRBuilder {
441 public:
442   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
443   /// not have an effect on \p M (see initialize)
OpenMPIRBuilder(Module & M)444   OpenMPIRBuilder(Module &M)
445       : M(M), Builder(M.getContext()), OffloadInfoManager(this),
446         T(Triple(M.getTargetTriple())) {}
447   ~OpenMPIRBuilder();
448 
449   /// Initialize the internal state, this will put structures types and
450   /// potentially other helpers into the underlying module. Must be called
451   /// before any other method and only once! This internal state includes types
452   /// used in the OpenMPIRBuilder generated from OMPKinds.def.
453   void initialize();
454 
setConfig(OpenMPIRBuilderConfig C)455   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
456 
457   /// Finalize the underlying module, e.g., by outlining regions.
458   /// \param Fn                    The function to be finalized. If not used,
459   ///                              all functions are finalized.
460   void finalize(Function *Fn = nullptr);
461 
462   /// Add attributes known for \p FnID to \p Fn.
463   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
464 
465   /// Type used throughout for insertion points.
466   using InsertPointTy = IRBuilder<>::InsertPoint;
467 
468   /// Get the create a name using the platform specific separators.
469   /// \param Parts parts of the final name that needs separation
470   /// The created name has a first separator between the first and second part
471   /// and a second separator between all other parts.
472   /// E.g. with FirstSeparator "$" and Separator "." and
473   /// parts: "p1", "p2", "p3", "p4"
474   /// The resulting name is "p1$p2.p3.p4"
475   /// The separators are retrieved from the OpenMPIRBuilderConfig.
476   std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
477 
478   /// Callback type for variable finalization (think destructors).
479   ///
480   /// \param CodeGenIP is the insertion point at which the finalization code
481   ///                  should be placed.
482   ///
483   /// A finalize callback knows about all objects that need finalization, e.g.
484   /// destruction, when the scope of the currently generated construct is left
485   /// at the time, and location, the callback is invoked.
486   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
487 
488   struct FinalizationInfo {
489     /// The finalization callback provided by the last in-flight invocation of
490     /// createXXXX for the directive of kind DK.
491     FinalizeCallbackTy FiniCB;
492 
493     /// The directive kind of the innermost directive that has an associated
494     /// region which might require finalization when it is left.
495     omp::Directive DK;
496 
497     /// Flag to indicate if the directive is cancellable.
498     bool IsCancellable;
499   };
500 
501   /// Push a finalization callback on the finalization stack.
502   ///
503   /// NOTE: Temporary solution until Clang CG is gone.
pushFinalizationCB(const FinalizationInfo & FI)504   void pushFinalizationCB(const FinalizationInfo &FI) {
505     FinalizationStack.push_back(FI);
506   }
507 
508   /// Pop the last finalization callback from the finalization stack.
509   ///
510   /// NOTE: Temporary solution until Clang CG is gone.
popFinalizationCB()511   void popFinalizationCB() { FinalizationStack.pop_back(); }
512 
513   /// Callback type for body (=inner region) code generation
514   ///
515   /// The callback takes code locations as arguments, each describing a
516   /// location where additional instructions can be inserted.
517   ///
518   /// The CodeGenIP may be in the middle of a basic block or point to the end of
519   /// it. The basic block may have a terminator or be degenerate. The callback
520   /// function may just insert instructions at that position, but also split the
521   /// block (without the Before argument of BasicBlock::splitBasicBlock such
522   /// that the identify of the split predecessor block is preserved) and insert
523   /// additional control flow, including branches that do not lead back to what
524   /// follows the CodeGenIP. Note that since the callback is allowed to split
525   /// the block, callers must assume that InsertPoints to positions in the
526   /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
527   /// such InsertPoints need to be preserved, it can split the block itself
528   /// before calling the callback.
529   ///
530   /// AllocaIP and CodeGenIP must not point to the same position.
531   ///
532   /// \param AllocaIP is the insertion point at which new alloca instructions
533   ///                 should be placed. The BasicBlock it is pointing to must
534   ///                 not be split.
535   /// \param CodeGenIP is the insertion point at which the body code should be
536   ///                  placed.
537   using BodyGenCallbackTy =
538       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
539 
540   // This is created primarily for sections construct as llvm::function_ref
541   // (BodyGenCallbackTy) is not storable (as described in the comments of
542   // function_ref class - function_ref contains non-ownable reference
543   // to the callable.
544   using StorableBodyGenCallbackTy =
545       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
546 
547   /// Callback type for loop body code generation.
548   ///
549   /// \param CodeGenIP is the insertion point where the loop's body code must be
550   ///                  placed. This will be a dedicated BasicBlock with a
551   ///                  conditional branch from the loop condition check and
552   ///                  terminated with an unconditional branch to the loop
553   ///                  latch.
554   /// \param IndVar    is the induction variable usable at the insertion point.
555   using LoopBodyGenCallbackTy =
556       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
557 
558   /// Callback type for variable privatization (think copy & default
559   /// constructor).
560   ///
561   /// \param AllocaIP is the insertion point at which new alloca instructions
562   ///                 should be placed.
563   /// \param CodeGenIP is the insertion point at which the privatization code
564   ///                  should be placed.
565   /// \param Original The value being copied/created, should not be used in the
566   ///                 generated IR.
567   /// \param Inner The equivalent of \p Original that should be used in the
568   ///              generated IR; this is equal to \p Original if the value is
569   ///              a pointer and can thus be passed directly, otherwise it is
570   ///              an equivalent but different value.
571   /// \param ReplVal The replacement value, thus a copy or new created version
572   ///                of \p Inner.
573   ///
574   /// \returns The new insertion point where code generation continues and
575   ///          \p ReplVal the replacement value.
576   using PrivatizeCallbackTy = function_ref<InsertPointTy(
577       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
578       Value &Inner, Value *&ReplVal)>;
579 
580   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
581   /// (filename, line, column, ...).
582   struct LocationDescription {
LocationDescriptionLocationDescription583     LocationDescription(const IRBuilderBase &IRB)
584         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescriptionLocationDescription585     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
LocationDescriptionLocationDescription586     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
587         : IP(IP), DL(DL) {}
588     InsertPointTy IP;
589     DebugLoc DL;
590   };
591 
592   /// Emitter methods for OpenMP directives.
593   ///
594   ///{
595 
596   /// Generator for '#omp barrier'
597   ///
598   /// \param Loc The location where the barrier directive was encountered.
599   /// \param DK The kind of directive that caused the barrier.
600   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
601   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
602   ///                        should be checked and acted upon.
603   ///
604   /// \returns The insertion point after the barrier.
605   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
606                               bool ForceSimpleCall = false,
607                               bool CheckCancelFlag = true);
608 
609   /// Generator for '#omp cancel'
610   ///
611   /// \param Loc The location where the directive was encountered.
612   /// \param IfCondition The evaluated 'if' clause expression, if any.
613   /// \param CanceledDirective The kind of directive that is cancled.
614   ///
615   /// \returns The insertion point after the barrier.
616   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
617                              omp::Directive CanceledDirective);
618 
619   /// Generator for '#omp parallel'
620   ///
621   /// \param Loc The insert and source location description.
622   /// \param AllocaIP The insertion points to be used for alloca instructions.
623   /// \param BodyGenCB Callback that will generate the region code.
624   /// \param PrivCB Callback to copy a given variable (think copy constructor).
625   /// \param FiniCB Callback to finalize variable copies.
626   /// \param IfCondition The evaluated 'if' clause expression, if any.
627   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
628   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
629   /// \param IsCancellable Flag to indicate a cancellable parallel region.
630   ///
631   /// \returns The insertion position *after* the parallel.
632   IRBuilder<>::InsertPoint
633   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
634                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
635                  FinalizeCallbackTy FiniCB, Value *IfCondition,
636                  Value *NumThreads, omp::ProcBindKind ProcBind,
637                  bool IsCancellable);
638 
639   /// Generator for the control flow structure of an OpenMP canonical loop.
640   ///
641   /// This generator operates on the logical iteration space of the loop, i.e.
642   /// the caller only has to provide a loop trip count of the loop as defined by
643   /// base language semantics. The trip count is interpreted as an unsigned
644   /// integer. The induction variable passed to \p BodyGenCB will be of the same
645   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
646   /// convert the logical iteration variable to the loop counter variable in the
647   /// loop body.
648   ///
649   /// \param Loc       The insert and source location description. The insert
650   ///                  location can be between two instructions or the end of a
651   ///                  degenerate block (e.g. a BB under construction).
652   /// \param BodyGenCB Callback that will generate the loop body code.
653   /// \param TripCount Number of iterations the loop body is executed.
654   /// \param Name      Base name used to derive BB and instruction names.
655   ///
656   /// \returns An object representing the created control flow structure which
657   ///          can be used for loop-associated directives.
658   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
659                                          LoopBodyGenCallbackTy BodyGenCB,
660                                          Value *TripCount,
661                                          const Twine &Name = "loop");
662 
663   /// Generator for the control flow structure of an OpenMP canonical loop.
664   ///
665   /// Instead of a logical iteration space, this allows specifying user-defined
666   /// loop counter values using increment, upper- and lower bounds. To
667   /// disambiguate the terminology when counting downwards, instead of lower
668   /// bounds we use \p Start for the loop counter value in the first body
669   /// iteration.
670   ///
671   /// Consider the following limitations:
672   ///
673   ///  * A loop counter space over all integer values of its bit-width cannot be
674   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
675   ///    stored into an 8 bit integer):
676   ///
677   ///      DO I = 0, 255, 1
678   ///
679   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
680   ///    effectively counting downwards:
681   ///
682   ///      for (uint8_t i = 100u; i > 0; i += 127u)
683   ///
684   ///
685   /// TODO: May need to add additional parameters to represent:
686   ///
687   ///  * Allow representing downcounting with unsigned integers.
688   ///
689   ///  * Sign of the step and the comparison operator might disagree:
690   ///
691   ///      for (int i = 0; i < 42; i -= 1u)
692   ///
693   //
694   /// \param Loc       The insert and source location description.
695   /// \param BodyGenCB Callback that will generate the loop body code.
696   /// \param Start     Value of the loop counter for the first iterations.
697   /// \param Stop      Loop counter values past this will stop the loop.
698   /// \param Step      Loop counter increment after each iteration; negative
699   ///                  means counting down.
700   /// \param IsSigned  Whether Start, Stop and Step are signed integers.
701   /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
702   ///                      counter.
703   /// \param ComputeIP Insertion point for instructions computing the trip
704   ///                  count. Can be used to ensure the trip count is available
705   ///                  at the outermost loop of a loop nest. If not set,
706   ///                  defaults to the preheader of the generated loop.
707   /// \param Name      Base name used to derive BB and instruction names.
708   ///
709   /// \returns An object representing the created control flow structure which
710   ///          can be used for loop-associated directives.
711   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
712                                          LoopBodyGenCallbackTy BodyGenCB,
713                                          Value *Start, Value *Stop, Value *Step,
714                                          bool IsSigned, bool InclusiveStop,
715                                          InsertPointTy ComputeIP = {},
716                                          const Twine &Name = "loop");
717 
718   /// Collapse a loop nest into a single loop.
719   ///
720   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
721   /// that has the same number of innermost loop iterations as the origin loop
722   /// nest. The induction variables of the input loops are derived from the
723   /// collapsed loop's induction variable. This is intended to be used to
724   /// implement OpenMP's collapse clause. Before applying a directive,
725   /// collapseLoops normalizes a loop nest to contain only a single loop and the
726   /// directive's implementation does not need to handle multiple loops itself.
727   /// This does not remove the need to handle all loop nest handling by
728   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
729   /// modifier of the worksharing-loop directive.
730   ///
731   /// Example:
732   /// \code
733   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
734   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
735   ///       body(i, j);
736   /// \endcode
737   ///
738   /// After collapsing with Loops={i,j}, the loop is changed to
739   /// \code
740   ///   for (int ij = 0; ij < 63; ++ij) {
741   ///     int i = ij / 9;
742   ///     int j = ij % 9;
743   ///     body(i, j);
744   ///   }
745   /// \endcode
746   ///
747   /// In the current implementation, the following limitations apply:
748   ///
749   ///  * All input loops have an induction variable of the same type.
750   ///
751   ///  * The collapsed loop will have the same trip count integer type as the
752   ///    input loops. Therefore it is possible that the collapsed loop cannot
753   ///    represent all iterations of the input loops. For instance, assuming a
754   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
755   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
756   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
757   ///    in this case.
758   ///
759   ///  * The trip counts of every input loop must be available at \p ComputeIP.
760   ///    Non-rectangular loops are not yet supported.
761   ///
762   ///  * At each nest level, code between a surrounding loop and its nested loop
763   ///    is hoisted into the loop body, and such code will be executed more
764   ///    often than before collapsing (or not at all if any inner loop iteration
765   ///    has a trip count of 0). This is permitted by the OpenMP specification.
766   ///
767   /// \param DL        Debug location for instructions added for collapsing,
768   ///                  such as instructions to compute/derive the input loop's
769   ///                  induction variables.
770   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
771   ///                  from outermost-to-innermost and every control flow of a
772   ///                  loop's body must pass through its directly nested loop.
773   /// \param ComputeIP Where additional instruction that compute the collapsed
774   ///                  trip count. If not set, defaults to before the generated
775   ///                  loop.
776   ///
777   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
778   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
779                                    ArrayRef<CanonicalLoopInfo *> Loops,
780                                    InsertPointTy ComputeIP);
781 
782   /// Get the default alignment value for given target
783   ///
784   /// \param TargetTriple   Target triple
785   /// \param Features       StringMap which describes extra CPU features
786   static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
787                                             const StringMap<bool> &Features);
788 
789   /// Retrieve (or create if non-existent) the address of a declare
790   /// target variable, used in conjunction with registerTargetGlobalVariable
791   /// to create declare target global variables.
792   ///
793   /// \param CaptureClause - enumerator corresponding to the OpenMP capture
794   /// clause used in conjunction with the variable being registered (link,
795   /// to, enter).
796   /// \param DeviceClause - enumerator corresponding to the OpenMP capture
797   /// clause used in conjunction with the variable being registered (nohost,
798   /// host, any)
799   /// \param IsDeclaration - boolean stating if the variable being registered
800   /// is a declaration-only and not a definition
801   /// \param IsExternallyVisible - boolean stating if the variable is externally
802   /// visible
803   /// \param EntryInfo - Unique entry information for the value generated
804   /// using getTargetEntryUniqueInfo, used to name generated pointer references
805   /// to the declare target variable
806   /// \param MangledName - the mangled name of the variable being registered
807   /// \param GeneratedRefs - references generated by invocations of
808   /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
809   /// these are required by Clang for book keeping.
810   /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
811   /// \param TargetTriple - The OpenMP device target triple we are compiling
812   /// for
813   /// \param LlvmPtrTy - The type of the variable we are generating or
814   /// retrieving an address for
815   /// \param GlobalInitializer - a lambda function which creates a constant
816   /// used for initializing a pointer reference to the variable in certain
817   /// cases. If a nullptr is passed, it will default to utilising the original
818   /// variable to initialize the pointer reference.
819   /// \param VariableLinkage - a lambda function which returns the variables
820   /// linkage type, if unspecified and a nullptr is given, it will instead
821   /// utilise the linkage stored on the existing global variable in the
822   /// LLVMModule.
823   Constant *getAddrOfDeclareTargetVar(
824       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
825       OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
826       bool IsDeclaration, bool IsExternallyVisible,
827       TargetRegionEntryInfo EntryInfo, StringRef MangledName,
828       std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
829       std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
830       std::function<Constant *()> GlobalInitializer,
831       std::function<GlobalValue::LinkageTypes()> VariableLinkage);
832 
833   /// Registers a target variable for device or host.
834   ///
835   /// \param CaptureClause - enumerator corresponding to the OpenMP capture
836   /// clause used in conjunction with the variable being registered (link,
837   /// to, enter).
838   /// \param DeviceClause - enumerator corresponding to the OpenMP capture
839   /// clause used in conjunction with the variable being registered (nohost,
840   /// host, any)
841   /// \param IsDeclaration - boolean stating if the variable being registered
842   /// is a declaration-only and not a definition
843   /// \param IsExternallyVisible - boolean stating if the variable is externally
844   /// visible
845   /// \param EntryInfo - Unique entry information for the value generated
846   /// using getTargetEntryUniqueInfo, used to name generated pointer references
847   /// to the declare target variable
848   /// \param MangledName - the mangled name of the variable being registered
849   /// \param GeneratedRefs - references generated by invocations of
850   /// registerTargetGlobalVariable these are required by Clang for book
851   /// keeping.
852   /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
853   /// \param TargetTriple - The OpenMP device target triple we are compiling
854   /// for
855   /// \param GlobalInitializer - a lambda function which creates a constant
856   /// used for initializing a pointer reference to the variable in certain
857   /// cases. If a nullptr is passed, it will default to utilising the original
858   /// variable to initialize the pointer reference.
859   /// \param VariableLinkage - a lambda function which returns the variables
860   /// linkage type, if unspecified and a nullptr is given, it will instead
861   /// utilise the linkage stored on the existing global variable in the
862   /// LLVMModule.
863   /// \param LlvmPtrTy - The type of the variable we are generating or
864   /// retrieving an address for
865   /// \param Addr - the original llvm value (addr) of the variable to be
866   /// registered
867   void registerTargetGlobalVariable(
868       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
869       OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
870       bool IsDeclaration, bool IsExternallyVisible,
871       TargetRegionEntryInfo EntryInfo, StringRef MangledName,
872       std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
873       std::vector<Triple> TargetTriple,
874       std::function<Constant *()> GlobalInitializer,
875       std::function<GlobalValue::LinkageTypes()> VariableLinkage,
876       Type *LlvmPtrTy, Constant *Addr);
877 
878   /// Get the offset of the OMP_MAP_MEMBER_OF field.
879   unsigned getFlagMemberOffset();
880 
881   /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
882   /// the position given.
883   /// \param Position - A value indicating the position of the parent
884   /// of the member in the kernel argument structure, often retrieved
885   /// by the parents position in the combined information vectors used
886   /// to generate the structure itself. Multiple children (member's of)
887   /// with the same parent will use the same returned member flag.
888   omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position);
889 
890   /// Given an initial flag set, this function modifies it to contain
891   /// the passed in MemberOfFlag generated from the getMemberOfFlag
892   /// function. The results are dependent on the existing flag bits
893   /// set in the original flag set.
894   /// \param Flags - The original set of flags to be modified with the
895   /// passed in MemberOfFlag.
896   /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
897   /// slightly based on the getMemberOfFlag which adjusts the flag bits
898   /// based on the members position in its parent.
899   void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags,
900                               omp::OpenMPOffloadMappingFlags MemberOfFlag);
901 
902 private:
903   /// Modifies the canonical loop to be a statically-scheduled workshare loop
904   /// which is executed on the device
905   ///
906   /// This takes a \p CLI representing a canonical loop, such as the one
907   /// created by \see createCanonicalLoop and emits additional instructions to
908   /// turn it into a workshare loop. In particular, it calls to an OpenMP
909   /// runtime function in the preheader to call OpenMP device rtl function
910   /// which handles worksharing of loop body interations.
911   ///
912   /// \param DL       Debug location for instructions added for the
913   ///                 workshare-loop construct itself.
914   /// \param CLI      A descriptor of the canonical loop to workshare.
915   /// \param AllocaIP An insertion point for Alloca instructions usable in the
916   ///                 preheader of the loop.
917   /// \param LoopType Information about type of loop worksharing.
918   ///                 It corresponds to type of loop workshare OpenMP pragma.
919   ///
920   /// \returns Point where to insert code after the workshare construct.
921   InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
922                                          InsertPointTy AllocaIP,
923                                          omp::WorksharingLoopType LoopType);
924 
925   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
926   ///
927   /// This takes a \p LoopInfo representing a canonical loop, such as the one
928   /// created by \p createCanonicalLoop and emits additional instructions to
929   /// turn it into a workshare loop. In particular, it calls to an OpenMP
930   /// runtime function in the preheader to obtain the loop bounds to be used in
931   /// the current thread, updates the relevant instructions in the canonical
932   /// loop and calls to an OpenMP runtime finalization function after the loop.
933   ///
934   /// \param DL       Debug location for instructions added for the
935   ///                 workshare-loop construct itself.
936   /// \param CLI      A descriptor of the canonical loop to workshare.
937   /// \param AllocaIP An insertion point for Alloca instructions usable in the
938   ///                 preheader of the loop.
939   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
940   ///                     the loop.
941   ///
942   /// \returns Point where to insert code after the workshare construct.
943   InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
944                                          InsertPointTy AllocaIP,
945                                          bool NeedsBarrier);
946 
947   /// Modifies the canonical loop a statically-scheduled workshare loop with a
948   /// user-specified chunk size.
949   ///
950   /// \param DL           Debug location for instructions added for the
951   ///                     workshare-loop construct itself.
952   /// \param CLI          A descriptor of the canonical loop to workshare.
953   /// \param AllocaIP     An insertion point for Alloca instructions usable in
954   ///                     the preheader of the loop.
955   /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
956   ///                     loop.
957   /// \param ChunkSize    The user-specified chunk size.
958   ///
959   /// \returns Point where to insert code after the workshare construct.
960   InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
961                                                 CanonicalLoopInfo *CLI,
962                                                 InsertPointTy AllocaIP,
963                                                 bool NeedsBarrier,
964                                                 Value *ChunkSize);
965 
966   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
967   ///
968   /// This takes a \p LoopInfo representing a canonical loop, such as the one
969   /// created by \p createCanonicalLoop and emits additional instructions to
970   /// turn it into a workshare loop. In particular, it calls to an OpenMP
971   /// runtime function in the preheader to obtain, and then in each iteration
972   /// to update the loop counter.
973   ///
974   /// \param DL       Debug location for instructions added for the
975   ///                 workshare-loop construct itself.
976   /// \param CLI      A descriptor of the canonical loop to workshare.
977   /// \param AllocaIP An insertion point for Alloca instructions usable in the
978   ///                 preheader of the loop.
979   /// \param SchedType Type of scheduling to be passed to the init function.
980   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
981   ///                     the loop.
982   /// \param Chunk    The size of loop chunk considered as a unit when
983   ///                 scheduling. If \p nullptr, defaults to 1.
984   ///
985   /// \returns Point where to insert code after the workshare construct.
986   InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
987                                           InsertPointTy AllocaIP,
988                                           omp::OMPScheduleType SchedType,
989                                           bool NeedsBarrier,
990                                           Value *Chunk = nullptr);
991 
992   /// Create alternative version of the loop to support if clause
993   ///
994   /// OpenMP if clause can require to generate second loop. This loop
995   /// will be executed when if clause condition is not met. createIfVersion
996   /// adds branch instruction to the copied loop if \p  ifCond is not met.
997   ///
998   /// \param Loop       Original loop which should be versioned.
999   /// \param IfCond     Value which corresponds to if clause condition
1000   /// \param VMap       Value to value map to define relation between
1001   ///                   original and copied loop values and loop blocks.
1002   /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1003   void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1004                        ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
1005 
1006 public:
1007   /// Modifies the canonical loop to be a workshare loop.
1008   ///
1009   /// This takes a \p LoopInfo representing a canonical loop, such as the one
1010   /// created by \p createCanonicalLoop and emits additional instructions to
1011   /// turn it into a workshare loop. In particular, it calls to an OpenMP
1012   /// runtime function in the preheader to obtain the loop bounds to be used in
1013   /// the current thread, updates the relevant instructions in the canonical
1014   /// loop and calls to an OpenMP runtime finalization function after the loop.
1015   ///
1016   /// The concrete transformation is done by applyStaticWorkshareLoop,
1017   /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1018   /// on the value of \p SchedKind and \p ChunkSize.
1019   ///
1020   /// \param DL       Debug location for instructions added for the
1021   ///                 workshare-loop construct itself.
1022   /// \param CLI      A descriptor of the canonical loop to workshare.
1023   /// \param AllocaIP An insertion point for Alloca instructions usable in the
1024   ///                 preheader of the loop.
1025   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1026   ///                     the loop.
1027   /// \param SchedKind Scheduling algorithm to use.
1028   /// \param ChunkSize The chunk size for the inner loop.
1029   /// \param HasSimdModifier Whether the simd modifier is present in the
1030   ///                        schedule clause.
1031   /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1032   ///                             the schedule clause.
1033   /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1034   ///                                present in the schedule clause.
1035   /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1036   ///                         present.
1037   /// \param LoopType Information about type of loop worksharing.
1038   ///                 It corresponds to type of loop workshare OpenMP pragma.
1039   ///
1040   /// \returns Point where to insert code after the workshare construct.
1041   InsertPointTy applyWorkshareLoop(
1042       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1043       bool NeedsBarrier,
1044       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1045       Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1046       bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1047       bool HasOrderedClause = false,
1048       omp::WorksharingLoopType LoopType =
1049           omp::WorksharingLoopType::ForStaticLoop);
1050 
1051   /// Tile a loop nest.
1052   ///
1053   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1054   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1055   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1056   /// of every loop and every tile sizes must be usable in the outermost
1057   /// loop's preheader. This implies that the loop nest is rectangular.
1058   ///
1059   /// Example:
1060   /// \code
1061   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
1062   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
1063   ///         body(i, j);
1064   /// \endcode
1065   ///
1066   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1067   /// \code
1068   ///   for (int i1 = 0; i1 < 3; ++i1)
1069   ///     for (int j1 = 0; j1 < 2; ++j1)
1070   ///       for (int i2 = 0; i2 < 5; ++i2)
1071   ///         for (int j2 = 0; j2 < 7; ++j2)
1072   ///           body(i1*3+i2, j1*3+j2);
1073   /// \endcode
1074   ///
1075   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1076   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1077   /// handles non-constant trip counts, non-constant tile sizes and trip counts
1078   /// that are not multiples of the tile size. In the latter case the tile loop
1079   /// of the last floor-loop iteration will have fewer iterations than specified
1080   /// as its tile size.
1081   ///
1082   ///
1083   /// @param DL        Debug location for instructions added by tiling, for
1084   ///                  instance the floor- and tile trip count computation.
1085   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
1086   ///                  invalidated by this method, i.e. should not used after
1087   ///                  tiling.
1088   /// @param TileSizes For each loop in \p Loops, the tile size for that
1089   ///                  dimensions.
1090   ///
1091   /// \returns A list of generated loops. Contains twice as many loops as the
1092   ///          input loop nest; the first half are the floor loops and the
1093   ///          second half are the tile loops.
1094   std::vector<CanonicalLoopInfo *>
1095   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
1096             ArrayRef<Value *> TileSizes);
1097 
1098   /// Fully unroll a loop.
1099   ///
1100   /// Instead of unrolling the loop immediately (and duplicating its body
1101   /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1102   /// metadata.
1103   ///
1104   /// \param DL   Debug location for instructions added by unrolling.
1105   /// \param Loop The loop to unroll. The loop will be invalidated.
1106   void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
1107 
1108   /// Fully or partially unroll a loop. How the loop is unrolled is determined
1109   /// using LLVM's LoopUnrollPass.
1110   ///
1111   /// \param DL   Debug location for instructions added by unrolling.
1112   /// \param Loop The loop to unroll. The loop will be invalidated.
1113   void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
1114 
1115   /// Partially unroll a loop.
1116   ///
1117   /// The CanonicalLoopInfo of the unrolled loop for use with chained
1118   /// loop-associated directive can be requested using \p UnrolledCLI. Not
1119   /// needing the CanonicalLoopInfo allows more efficient code generation by
1120   /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1121   /// A loop-associated directive applied to the unrolled loop needs to know the
1122   /// new trip count which means that if using a heuristically determined unroll
1123   /// factor (\p Factor == 0), that factor must be computed immediately. We are
1124   /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1125   /// but which assumes that some canonicalization has taken place (e.g.
1126   /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1127   /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1128   ///
1129   /// \param DL          Debug location for instructions added by unrolling.
1130   /// \param Loop        The loop to unroll. The loop will be invalidated.
1131   /// \param Factor      The factor to unroll the loop by. A factor of 0
1132   ///                    indicates that a heuristic should be used to determine
1133   ///                    the unroll-factor.
1134   /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1135   ///                    partially unrolled loop. Otherwise, uses loop metadata
1136   ///                    to defer unrolling to the LoopUnrollPass.
1137   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1138                          CanonicalLoopInfo **UnrolledCLI);
1139 
1140   /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1141   /// is cloned. The metadata which prevents vectorization is added to
1142   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1143   /// to false.
1144   ///
1145   /// \param Loop        The loop to simd-ize.
1146   /// \param AlignedVars The map which containts pairs of the pointer
1147   ///                    and its corresponding alignment.
1148   /// \param IfCond      The value which corresponds to the if clause
1149   ///                    condition.
1150   /// \param Order       The enum to map order clause.
1151   /// \param Simdlen     The Simdlen length to apply to the simd loop.
1152   /// \param Safelen     The Safelen length to apply to the simd loop.
1153   void applySimd(CanonicalLoopInfo *Loop,
1154                  MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1155                  omp::OrderKind Order, ConstantInt *Simdlen,
1156                  ConstantInt *Safelen);
1157 
1158   /// Generator for '#omp flush'
1159   ///
1160   /// \param Loc The location where the flush directive was encountered
1161   void createFlush(const LocationDescription &Loc);
1162 
1163   /// Generator for '#omp taskwait'
1164   ///
1165   /// \param Loc The location where the taskwait directive was encountered.
1166   void createTaskwait(const LocationDescription &Loc);
1167 
1168   /// Generator for '#omp taskyield'
1169   ///
1170   /// \param Loc The location where the taskyield directive was encountered.
1171   void createTaskyield(const LocationDescription &Loc);
1172 
1173   /// A struct to pack the relevant information for an OpenMP depend clause.
1174   struct DependData {
1175     omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
1176     Type *DepValueType;
1177     Value *DepVal;
1178     explicit DependData() = default;
DependDataDependData1179     DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
1180                Value *DepVal)
1181         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
1182   };
1183 
1184   /// Generator for `#omp task`
1185   ///
1186   /// \param Loc The location where the task construct was encountered.
1187   /// \param AllocaIP The insertion point to be used for alloca instructions.
1188   /// \param BodyGenCB Callback that will generate the region code.
1189   /// \param Tied True if the task is tied, false if the task is untied.
1190   /// \param Final i1 value which is `true` if the task is final, `false` if the
1191   ///              task is not final.
1192   /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1193   ///                    task is generated, and the encountering thread must
1194   ///                    suspend the current task region, for which execution
1195   ///                    cannot be resumed until execution of the structured
1196   ///                    block that is associated with the generated task is
1197   ///                    completed.
1198   InsertPointTy createTask(const LocationDescription &Loc,
1199                            InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1200                            bool Tied = true, Value *Final = nullptr,
1201                            Value *IfCondition = nullptr,
1202                            SmallVector<DependData> Dependencies = {});
1203 
1204   /// Generator for the taskgroup construct
1205   ///
1206   /// \param Loc The location where the taskgroup construct was encountered.
1207   /// \param AllocaIP The insertion point to be used for alloca instructions.
1208   /// \param BodyGenCB Callback that will generate the region code.
1209   InsertPointTy createTaskgroup(const LocationDescription &Loc,
1210                                 InsertPointTy AllocaIP,
1211                                 BodyGenCallbackTy BodyGenCB);
1212 
1213   using FileIdentifierInfoCallbackTy =
1214       std::function<std::tuple<std::string, uint64_t>()>;
1215 
1216   /// Creates a unique info for a target entry when provided a filename and
1217   /// line number from.
1218   ///
1219   /// \param CallBack A callback function which should return filename the entry
1220   /// resides in as well as the line number for the target entry
1221   /// \param ParentName The name of the parent the target entry resides in, if
1222   /// any.
1223   static TargetRegionEntryInfo
1224   getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
1225                            StringRef ParentName = "");
1226 
1227   /// Functions used to generate reductions. Such functions take two Values
1228   /// representing LHS and RHS of the reduction, respectively, and a reference
1229   /// to the value that is updated to refer to the reduction result.
1230   using ReductionGenTy =
1231       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
1232 
1233   /// Functions used to generate atomic reductions. Such functions take two
1234   /// Values representing pointers to LHS and RHS of the reduction, as well as
1235   /// the element type of these pointers. They are expected to atomically
1236   /// update the LHS to the reduced value.
1237   using AtomicReductionGenTy =
1238       function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
1239 
1240   /// Information about an OpenMP reduction.
1241   struct ReductionInfo {
ReductionInfoReductionInfo1242     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
1243                   ReductionGenTy ReductionGen,
1244                   AtomicReductionGenTy AtomicReductionGen)
1245         : ElementType(ElementType), Variable(Variable),
1246           PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
1247           AtomicReductionGen(AtomicReductionGen) {}
1248 
1249     /// Reduction element type, must match pointee type of variable.
1250     Type *ElementType;
1251 
1252     /// Reduction variable of pointer type.
1253     Value *Variable;
1254 
1255     /// Thread-private partial reduction variable.
1256     Value *PrivateVariable;
1257 
1258     /// Callback for generating the reduction body. The IR produced by this will
1259     /// be used to combine two values in a thread-safe context, e.g., under
1260     /// lock or within the same thread, and therefore need not be atomic.
1261     ReductionGenTy ReductionGen;
1262 
1263     /// Callback for generating the atomic reduction body, may be null. The IR
1264     /// produced by this will be used to atomically combine two values during
1265     /// reduction. If null, the implementation will use the non-atomic version
1266     /// along with the appropriate synchronization mechanisms.
1267     AtomicReductionGenTy AtomicReductionGen;
1268   };
1269 
1270   // TODO: provide atomic and non-atomic reduction generators for reduction
1271   // operators defined by the OpenMP specification.
1272 
1273   /// Generator for '#omp reduction'.
1274   ///
1275   /// Emits the IR instructing the runtime to perform the specific kind of
1276   /// reductions. Expects reduction variables to have been privatized and
1277   /// initialized to reduction-neutral values separately. Emits the calls to
1278   /// runtime functions as well as the reduction function and the basic blocks
1279   /// performing the reduction atomically and non-atomically.
1280   ///
1281   /// The code emitted for the following:
1282   ///
1283   /// \code
1284   ///   type var_1;
1285   ///   type var_2;
1286   ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1287   ///   /* body */;
1288   /// \endcode
1289   ///
1290   /// corresponds to the following sketch.
1291   ///
1292   /// \code
1293   /// void _outlined_par() {
1294   ///   // N is the number of different reductions.
1295   ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1296   ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1297   ///                        _omp_reduction_func,
1298   ///                        _gomp_critical_user.reduction.var)) {
1299   ///   case 1: {
1300   ///     var_1 = var_1 <reduction-op> privatized_var_1;
1301   ///     var_2 = var_2 <reduction-op> privatized_var_2;
1302   ///     // ...
1303   ///    __kmpc_end_reduce(...);
1304   ///     break;
1305   ///   }
1306   ///   case 2: {
1307   ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
1308   ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
1309   ///     // ...
1310   ///     break;
1311   ///   }
1312   ///   default: break;
1313   ///   }
1314   /// }
1315   ///
1316   /// void _omp_reduction_func(void **lhs, void **rhs) {
1317   ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1318   ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1319   ///   // ...
1320   /// }
1321   /// \endcode
1322   ///
1323   /// \param Loc                The location where the reduction was
1324   ///                           encountered. Must be within the associate
1325   ///                           directive and after the last local access to the
1326   ///                           reduction variables.
1327   /// \param AllocaIP           An insertion point suitable for allocas usable
1328   ///                           in reductions.
1329   /// \param ReductionInfos     A list of info on each reduction variable.
1330   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
1331   InsertPointTy createReductions(const LocationDescription &Loc,
1332                                  InsertPointTy AllocaIP,
1333                                  ArrayRef<ReductionInfo> ReductionInfos,
1334                                  bool IsNoWait = false);
1335 
1336   ///}
1337 
1338   /// Return the insertion point used by the underlying IRBuilder.
getInsertionPoint()1339   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
1340 
1341   /// Update the internal location to \p Loc.
updateToLocation(const LocationDescription & Loc)1342   bool updateToLocation(const LocationDescription &Loc) {
1343     Builder.restoreIP(Loc.IP);
1344     Builder.SetCurrentDebugLocation(Loc.DL);
1345     return Loc.IP.getBlock() != nullptr;
1346   }
1347 
1348   /// Return the function declaration for the runtime function with \p FnID.
1349   FunctionCallee getOrCreateRuntimeFunction(Module &M,
1350                                             omp::RuntimeFunction FnID);
1351 
1352   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
1353 
1354   /// Return the (LLVM-IR) string describing the source location \p LocStr.
1355   Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1356 
1357   /// Return the (LLVM-IR) string describing the default source location.
1358   Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
1359 
1360   /// Return the (LLVM-IR) string describing the source location identified by
1361   /// the arguments.
1362   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1363                                  unsigned Line, unsigned Column,
1364                                  uint32_t &SrcLocStrSize);
1365 
1366   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1367   /// fallback if \p DL does not specify the function name.
1368   Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
1369                                  Function *F = nullptr);
1370 
1371   /// Return the (LLVM-IR) string describing the source location \p Loc.
1372   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1373                                  uint32_t &SrcLocStrSize);
1374 
1375   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1376   /// TODO: Create a enum class for the Reserve2Flags
1377   Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1378                              omp::IdentFlag Flags = omp::IdentFlag(0),
1379                              unsigned Reserve2Flags = 0);
1380 
1381   /// Create a hidden global flag \p Name in the module with initial value \p
1382   /// Value.
1383   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
1384 
1385   /// Generate control flow and cleanup for cancellation.
1386   ///
1387   /// \param CancelFlag Flag indicating if the cancellation is performed.
1388   /// \param CanceledDirective The kind of directive that is cancled.
1389   /// \param ExitCB Extra code to be generated in the exit block.
1390   void emitCancelationCheckImpl(Value *CancelFlag,
1391                                 omp::Directive CanceledDirective,
1392                                 FinalizeCallbackTy ExitCB = {});
1393 
1394   /// Generate a target region entry call.
1395   ///
1396   /// \param Loc The location at which the request originated and is fulfilled.
1397   /// \param AllocaIP The insertion point to be used for alloca instructions.
1398   /// \param Return Return value of the created function returned by reference.
1399   /// \param DeviceID Identifier for the device via the 'device' clause.
1400   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1401   ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
1402   /// \param NumThreads Number of threads via the 'thread_limit' clause.
1403   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1404   /// \param KernelArgs Array of arguments to the kernel.
1405   InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1406                                  InsertPointTy AllocaIP, Value *&Return,
1407                                  Value *Ident, Value *DeviceID, Value *NumTeams,
1408                                  Value *NumThreads, Value *HostPtr,
1409                                  ArrayRef<Value *> KernelArgs);
1410 
1411   /// Generate a barrier runtime call.
1412   ///
1413   /// \param Loc The location at which the request originated and is fulfilled.
1414   /// \param DK The directive which caused the barrier
1415   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1416   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1417   ///                        should be checked and acted upon.
1418   ///
1419   /// \returns The insertion point after the barrier.
1420   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1421                                 omp::Directive DK, bool ForceSimpleCall,
1422                                 bool CheckCancelFlag);
1423 
1424   /// Generate a flush runtime call.
1425   ///
1426   /// \param Loc The location at which the request originated and is fulfilled.
1427   void emitFlush(const LocationDescription &Loc);
1428 
1429   /// The finalization stack made up of finalize callbacks currently in-flight,
1430   /// wrapped into FinalizationInfo objects that reference also the finalization
1431   /// target block and the kind of cancellable directive.
1432   SmallVector<FinalizationInfo, 8> FinalizationStack;
1433 
1434   /// Return true if the last entry in the finalization stack is of kind \p DK
1435   /// and cancellable.
isLastFinalizationInfoCancellable(omp::Directive DK)1436   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1437     return !FinalizationStack.empty() &&
1438            FinalizationStack.back().IsCancellable &&
1439            FinalizationStack.back().DK == DK;
1440   }
1441 
1442   /// Generate a taskwait runtime call.
1443   ///
1444   /// \param Loc The location at which the request originated and is fulfilled.
1445   void emitTaskwaitImpl(const LocationDescription &Loc);
1446 
1447   /// Generate a taskyield runtime call.
1448   ///
1449   /// \param Loc The location at which the request originated and is fulfilled.
1450   void emitTaskyieldImpl(const LocationDescription &Loc);
1451 
1452   /// Return the current thread ID.
1453   ///
1454   /// \param Ident The ident (ident_t*) describing the query origin.
1455   Value *getOrCreateThreadID(Value *Ident);
1456 
1457   /// The OpenMPIRBuilder Configuration
1458   OpenMPIRBuilderConfig Config;
1459 
1460   /// The underlying LLVM-IR module
1461   Module &M;
1462 
1463   /// The LLVM-IR Builder used to create IR.
1464   IRBuilder<> Builder;
1465 
1466   /// Map to remember source location strings
1467   StringMap<Constant *> SrcLocStrMap;
1468 
1469   /// Map to remember existing ident_t*.
1470   DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
1471 
1472   /// Info manager to keep track of target regions.
1473   OffloadEntriesInfoManager OffloadInfoManager;
1474 
1475   /// The target triple of the underlying module.
1476   const Triple T;
1477 
1478   /// Helper that contains information about regions we need to outline
1479   /// during finalization.
1480   struct OutlineInfo {
1481     using PostOutlineCBTy = std::function<void(Function &)>;
1482     PostOutlineCBTy PostOutlineCB;
1483     BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
1484     SmallVector<Value *, 2> ExcludeArgsFromAggregate;
1485 
1486     /// Collect all blocks in between EntryBB and ExitBB in both the given
1487     /// vector and set.
1488     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
1489                        SmallVectorImpl<BasicBlock *> &BlockVector);
1490 
1491     /// Return the function that contains the region to be outlined.
getFunctionOutlineInfo1492     Function *getFunction() const { return EntryBB->getParent(); }
1493   };
1494 
1495   /// Collection of regions that need to be outlined during finalization.
1496   SmallVector<OutlineInfo, 16> OutlineInfos;
1497 
1498   /// Collection of owned canonical loop objects that eventually need to be
1499   /// free'd.
1500   std::forward_list<CanonicalLoopInfo> LoopInfos;
1501 
1502   /// Add a new region that will be outlined later.
addOutlineInfo(OutlineInfo && OI)1503   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1504 
1505   /// An ordered map of auto-generated variables to their unique names.
1506   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1507   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1508   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1509   /// variables.
1510   StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars;
1511 
1512   /// Computes the size of type in bytes.
1513   Value *getSizeInBytes(Value *BasePtr);
1514 
1515   // Emit a branch from the current block to the Target block only if
1516   // the current block has a terminator.
1517   void emitBranch(BasicBlock *Target);
1518 
1519   // If BB has no use then delete it and return. Else place BB after the current
1520   // block, if possible, or else at the end of the function. Also add a branch
1521   // from current block to BB if current block does not have a terminator.
1522   void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1523 
1524   /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1525   /// Here is the logic:
1526   /// if (Cond) {
1527   ///   ThenGen();
1528   /// } else {
1529   ///   ElseGen();
1530   /// }
1531   void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
1532                     BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1533 
1534   /// Create the global variable holding the offload mappings information.
1535   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
1536                                         std::string VarName);
1537 
1538   /// Create the global variable holding the offload names information.
1539   GlobalVariable *
1540   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
1541                         std::string VarName);
1542 
1543   struct MapperAllocas {
1544     AllocaInst *ArgsBase = nullptr;
1545     AllocaInst *Args = nullptr;
1546     AllocaInst *ArgSizes = nullptr;
1547   };
1548 
1549   /// Create the allocas instruction used in call to mapper functions.
1550   void createMapperAllocas(const LocationDescription &Loc,
1551                            InsertPointTy AllocaIP, unsigned NumOperands,
1552                            struct MapperAllocas &MapperAllocas);
1553 
1554   /// Create the call for the target mapper function.
1555   /// \param Loc The source location description.
1556   /// \param MapperFunc Function to be called.
1557   /// \param SrcLocInfo Source location information global.
1558   /// \param MaptypesArg The argument types.
1559   /// \param MapnamesArg The argument names.
1560   /// \param MapperAllocas The AllocaInst used for the call.
1561   /// \param DeviceID Device ID for the call.
1562   /// \param NumOperands Number of operands in the call.
1563   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1564                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1565                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1566                       unsigned NumOperands);
1567 
1568   /// Container for the arguments used to pass data to the runtime library.
1569   struct TargetDataRTArgs {
1570     /// The array of base pointer passed to the runtime library.
1571     Value *BasePointersArray = nullptr;
1572     /// The array of section pointers passed to the runtime library.
1573     Value *PointersArray = nullptr;
1574     /// The array of sizes passed to the runtime library.
1575     Value *SizesArray = nullptr;
1576     /// The array of map types passed to the runtime library for the beginning
1577     /// of the region or for the entire region if there are no separate map
1578     /// types for the region end.
1579     Value *MapTypesArray = nullptr;
1580     /// The array of map types passed to the runtime library for the end of the
1581     /// region, or nullptr if there are no separate map types for the region
1582     /// end.
1583     Value *MapTypesArrayEnd = nullptr;
1584     /// The array of user-defined mappers passed to the runtime library.
1585     Value *MappersArray = nullptr;
1586     /// The array of original declaration names of mapped pointers sent to the
1587     /// runtime library for debugging
1588     Value *MapNamesArray = nullptr;
1589 
TargetDataRTArgsTargetDataRTArgs1590     explicit TargetDataRTArgs() {}
TargetDataRTArgsTargetDataRTArgs1591     explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray,
1592                               Value *SizesArray, Value *MapTypesArray,
1593                               Value *MapTypesArrayEnd, Value *MappersArray,
1594                               Value *MapNamesArray)
1595         : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
1596           SizesArray(SizesArray), MapTypesArray(MapTypesArray),
1597           MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray),
1598           MapNamesArray(MapNamesArray) {}
1599   };
1600 
1601   /// Data structure that contains the needed information to construct the
1602   /// kernel args vector.
1603   struct TargetKernelArgs {
1604     /// Number of arguments passed to the runtime library.
1605     unsigned NumTargetItems;
1606     /// Arguments passed to the runtime library
1607     TargetDataRTArgs RTArgs;
1608     /// The number of iterations
1609     Value *NumIterations;
1610     /// The number of teams.
1611     Value *NumTeams;
1612     /// The number of threads.
1613     Value *NumThreads;
1614     /// The size of the dynamic shared memory.
1615     Value *DynCGGroupMem;
1616     /// True if the kernel has 'no wait' clause.
1617     bool HasNoWait;
1618 
1619     /// Constructor for TargetKernelArgs
TargetKernelArgsTargetKernelArgs1620     TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
1621                      Value *NumIterations, Value *NumTeams, Value *NumThreads,
1622                      Value *DynCGGroupMem, bool HasNoWait)
1623         : NumTargetItems(NumTargetItems), RTArgs(RTArgs),
1624           NumIterations(NumIterations), NumTeams(NumTeams),
1625           NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem),
1626           HasNoWait(HasNoWait) {}
1627   };
1628 
1629   /// Create the kernel args vector used by emitTargetKernel. This function
1630   /// creates various constant values that are used in the resulting args
1631   /// vector.
1632   static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1633                                   IRBuilderBase &Builder,
1634                                   SmallVector<Value *> &ArgsVector);
1635 
1636   /// Struct that keeps the information that should be kept throughout
1637   /// a 'target data' region.
1638   class TargetDataInfo {
1639     /// Set to true if device pointer information have to be obtained.
1640     bool RequiresDevicePointerInfo = false;
1641     /// Set to true if Clang emits separate runtime calls for the beginning and
1642     /// end of the region.  These calls might have separate map type arrays.
1643     bool SeparateBeginEndCalls = false;
1644 
1645   public:
1646     TargetDataRTArgs RTArgs;
1647 
1648     SmallMapVector<const Value *, std::pair<Value *, Value *>, 4>
1649         DevicePtrInfoMap;
1650 
1651     /// Indicate whether any user-defined mapper exists.
1652     bool HasMapper = false;
1653     /// The total number of pointers passed to the runtime library.
1654     unsigned NumberOfPtrs = 0u;
1655 
TargetDataInfo()1656     explicit TargetDataInfo() {}
TargetDataInfo(bool RequiresDevicePointerInfo,bool SeparateBeginEndCalls)1657     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1658                             bool SeparateBeginEndCalls)
1659         : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1660           SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1661     /// Clear information about the data arrays.
clearArrayInfo()1662     void clearArrayInfo() {
1663       RTArgs = TargetDataRTArgs();
1664       HasMapper = false;
1665       NumberOfPtrs = 0u;
1666     }
1667     /// Return true if the current target data information has valid arrays.
isValid()1668     bool isValid() {
1669       return RTArgs.BasePointersArray && RTArgs.PointersArray &&
1670              RTArgs.SizesArray && RTArgs.MapTypesArray &&
1671              (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
1672     }
requiresDevicePointerInfo()1673     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
separateBeginEndCalls()1674     bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1675   };
1676 
1677   enum class DeviceInfoTy { None, Pointer, Address };
1678   using MapValuesArrayTy = SmallVector<Value *, 4>;
1679   using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>;
1680   using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>;
1681   using MapNamesArrayTy = SmallVector<Constant *, 4>;
1682   using MapDimArrayTy = SmallVector<uint64_t, 4>;
1683   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
1684 
1685   /// This structure contains combined information generated for mappable
1686   /// clauses, including base pointers, pointers, sizes, map types, user-defined
1687   /// mappers, and non-contiguous information.
1688   struct MapInfosTy {
1689     struct StructNonContiguousInfo {
1690       bool IsNonContiguous = false;
1691       MapDimArrayTy Dims;
1692       MapNonContiguousArrayTy Offsets;
1693       MapNonContiguousArrayTy Counts;
1694       MapNonContiguousArrayTy Strides;
1695     };
1696     MapValuesArrayTy BasePointers;
1697     MapValuesArrayTy Pointers;
1698     MapDeviceInfoArrayTy DevicePointers;
1699     MapValuesArrayTy Sizes;
1700     MapFlagsArrayTy Types;
1701     MapNamesArrayTy Names;
1702     StructNonContiguousInfo NonContigInfo;
1703 
1704     /// Append arrays in \a CurInfo.
appendMapInfosTy1705     void append(MapInfosTy &CurInfo) {
1706       BasePointers.append(CurInfo.BasePointers.begin(),
1707                           CurInfo.BasePointers.end());
1708       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1709       DevicePointers.append(CurInfo.DevicePointers.begin(),
1710                             CurInfo.DevicePointers.end());
1711       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1712       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1713       Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1714       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
1715                                 CurInfo.NonContigInfo.Dims.end());
1716       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
1717                                    CurInfo.NonContigInfo.Offsets.end());
1718       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
1719                                   CurInfo.NonContigInfo.Counts.end());
1720       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
1721                                    CurInfo.NonContigInfo.Strides.end());
1722     }
1723   };
1724 
1725   /// Callback function type for functions emitting the host fallback code that
1726   /// is executed when the kernel launch fails. It takes an insertion point as
1727   /// parameter where the code should be emitted. It returns an insertion point
1728   /// that points right after after the emitted code.
1729   using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>;
1730 
1731   /// Generate a target region entry call and host fallback call.
1732   ///
1733   /// \param Loc The location at which the request originated and is fulfilled.
1734   /// \param OutlinedFn The outlined kernel function.
1735   /// \param OutlinedFnID The ooulined function ID.
1736   /// \param EmitTargetCallFallbackCB Call back function to generate host
1737   ///        fallback code.
1738   /// \param Args Data structure holding information about the kernel arguments.
1739   /// \param DeviceID Identifier for the device via the 'device' clause.
1740   /// \param RTLoc Source location identifier
1741   /// \param AllocaIP The insertion point to be used for alloca instructions.
1742   InsertPointTy emitKernelLaunch(
1743       const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1744       EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1745       Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1746 
1747   /// Emit the arguments to be passed to the runtime library based on the
1748   /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
1749   /// ForEndCall, emit map types to be passed for the end of the region instead
1750   /// of the beginning.
1751   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
1752                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
1753                                     OpenMPIRBuilder::TargetDataInfo &Info,
1754                                     bool EmitDebug = false,
1755                                     bool ForEndCall = false);
1756 
1757   /// Emit an array of struct descriptors to be assigned to the offload args.
1758   void emitNonContiguousDescriptor(InsertPointTy AllocaIP,
1759                                    InsertPointTy CodeGenIP,
1760                                    MapInfosTy &CombinedInfo,
1761                                    TargetDataInfo &Info);
1762 
1763   /// Emit the arrays used to pass the captures and map information to the
1764   /// offloading runtime library. If there is no map or capture information,
1765   /// return nullptr by reference.
1766   void emitOffloadingArrays(
1767       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1768       TargetDataInfo &Info, bool IsNonContiguous = false,
1769       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1770       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1771 
1772   /// Creates offloading entry for the provided entry ID \a ID, address \a
1773   /// Addr, size \a Size, and flags \a Flags.
1774   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
1775                           int32_t Flags, GlobalValue::LinkageTypes,
1776                           StringRef Name = "");
1777 
1778   /// The kind of errors that can occur when emitting the offload entries and
1779   /// metadata.
1780   enum EmitMetadataErrorKind {
1781     EMIT_MD_TARGET_REGION_ERROR,
1782     EMIT_MD_DECLARE_TARGET_ERROR,
1783     EMIT_MD_GLOBAL_VAR_LINK_ERROR
1784   };
1785 
1786   /// Callback function type
1787   using EmitMetadataErrorReportFunctionTy =
1788       std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1789 
1790   // Emit the offloading entries and metadata so that the device codegen side
1791   // can easily figure out what to emit. The produced metadata looks like
1792   // this:
1793   //
1794   // !omp_offload.info = !{!1, ...}
1795   //
1796   // We only generate metadata for function that contain target regions.
1797   void createOffloadEntriesAndInfoMetadata(
1798       EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1799 
1800 public:
1801   /// Generator for __kmpc_copyprivate
1802   ///
1803   /// \param Loc The source location description.
1804   /// \param BufSize Number of elements in the buffer.
1805   /// \param CpyBuf List of pointers to data to be copied.
1806   /// \param CpyFn function to call for copying data.
1807   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1808   ///
1809   /// \return The insertion position *after* the CopyPrivate call.
1810 
1811   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1812                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
1813                                   llvm::Value *CpyFn, llvm::Value *DidIt);
1814 
1815   /// Generator for '#omp single'
1816   ///
1817   /// \param Loc The source location description.
1818   /// \param BodyGenCB Callback that will generate the region code.
1819   /// \param FiniCB Callback to finalize variable copies.
1820   /// \param IsNowait If false, a barrier is emitted.
1821   /// \param DidIt Local variable used as a flag to indicate 'single' thread
1822   ///
1823   /// \returns The insertion position *after* the single call.
1824   InsertPointTy createSingle(const LocationDescription &Loc,
1825                              BodyGenCallbackTy BodyGenCB,
1826                              FinalizeCallbackTy FiniCB, bool IsNowait,
1827                              llvm::Value *DidIt);
1828 
1829   /// Generator for '#omp master'
1830   ///
1831   /// \param Loc The insert and source location description.
1832   /// \param BodyGenCB Callback that will generate the region code.
1833   /// \param FiniCB Callback to finalize variable copies.
1834   ///
1835   /// \returns The insertion position *after* the master.
1836   InsertPointTy createMaster(const LocationDescription &Loc,
1837                              BodyGenCallbackTy BodyGenCB,
1838                              FinalizeCallbackTy FiniCB);
1839 
1840   /// Generator for '#omp masked'
1841   ///
1842   /// \param Loc The insert and source location description.
1843   /// \param BodyGenCB Callback that will generate the region code.
1844   /// \param FiniCB Callback to finialize variable copies.
1845   ///
1846   /// \returns The insertion position *after* the masked.
1847   InsertPointTy createMasked(const LocationDescription &Loc,
1848                              BodyGenCallbackTy BodyGenCB,
1849                              FinalizeCallbackTy FiniCB, Value *Filter);
1850 
1851   /// Generator for '#omp critical'
1852   ///
1853   /// \param Loc The insert and source location description.
1854   /// \param BodyGenCB Callback that will generate the region body code.
1855   /// \param FiniCB Callback to finalize variable copies.
1856   /// \param CriticalName name of the lock used by the critical directive
1857   /// \param HintInst Hint Instruction for hint clause associated with critical
1858   ///
1859   /// \returns The insertion position *after* the critical.
1860   InsertPointTy createCritical(const LocationDescription &Loc,
1861                                BodyGenCallbackTy BodyGenCB,
1862                                FinalizeCallbackTy FiniCB,
1863                                StringRef CriticalName, Value *HintInst);
1864 
1865   /// Generator for '#omp ordered depend (source | sink)'
1866   ///
1867   /// \param Loc The insert and source location description.
1868   /// \param AllocaIP The insertion point to be used for alloca instructions.
1869   /// \param NumLoops The number of loops in depend clause.
1870   /// \param StoreValues The value will be stored in vector address.
1871   /// \param Name The name of alloca instruction.
1872   /// \param IsDependSource If true, depend source; otherwise, depend sink.
1873   ///
1874   /// \return The insertion position *after* the ordered.
1875   InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1876                                     InsertPointTy AllocaIP, unsigned NumLoops,
1877                                     ArrayRef<llvm::Value *> StoreValues,
1878                                     const Twine &Name, bool IsDependSource);
1879 
1880   /// Generator for '#omp ordered [threads | simd]'
1881   ///
1882   /// \param Loc The insert and source location description.
1883   /// \param BodyGenCB Callback that will generate the region code.
1884   /// \param FiniCB Callback to finalize variable copies.
1885   /// \param IsThreads If true, with threads clause or without clause;
1886   /// otherwise, with simd clause;
1887   ///
1888   /// \returns The insertion position *after* the ordered.
1889   InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1890                                          BodyGenCallbackTy BodyGenCB,
1891                                          FinalizeCallbackTy FiniCB,
1892                                          bool IsThreads);
1893 
1894   /// Generator for '#omp sections'
1895   ///
1896   /// \param Loc The insert and source location description.
1897   /// \param AllocaIP The insertion points to be used for alloca instructions.
1898   /// \param SectionCBs Callbacks that will generate body of each section.
1899   /// \param PrivCB Callback to copy a given variable (think copy constructor).
1900   /// \param FiniCB Callback to finalize variable copies.
1901   /// \param IsCancellable Flag to indicate a cancellable parallel region.
1902   /// \param IsNowait If true, barrier - to ensure all sections are executed
1903   /// before moving forward will not be generated.
1904   /// \returns The insertion position *after* the sections.
1905   InsertPointTy createSections(const LocationDescription &Loc,
1906                                InsertPointTy AllocaIP,
1907                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1908                                PrivatizeCallbackTy PrivCB,
1909                                FinalizeCallbackTy FiniCB, bool IsCancellable,
1910                                bool IsNowait);
1911 
1912   /// Generator for '#omp section'
1913   ///
1914   /// \param Loc The insert and source location description.
1915   /// \param BodyGenCB Callback that will generate the region body code.
1916   /// \param FiniCB Callback to finalize variable copies.
1917   /// \returns The insertion position *after* the section.
1918   InsertPointTy createSection(const LocationDescription &Loc,
1919                               BodyGenCallbackTy BodyGenCB,
1920                               FinalizeCallbackTy FiniCB);
1921 
1922   /// Generator for `#omp teams`
1923   ///
1924   /// \param Loc The location where the teams construct was encountered.
1925   /// \param BodyGenCB Callback that will generate the region code.
1926   /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
1927   ///        it is as if lower bound is specified as equal to upperbound. If
1928   ///        this is non-null, then upperbound must also be non-null.
1929   /// \param NumTeamsUpper Upper bound on the number of teams.
1930   /// \param ThreadLimit on the number of threads that may participate in a
1931   ///        contention group created by each team.
1932   /// \param IfExpr is the integer argument value of the if condition on the
1933   ///        teams clause.
1934   InsertPointTy
1935   createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1936               Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
1937               Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
1938 
1939   /// Generate conditional branch and relevant BasicBlocks through which private
1940   /// threads copy the 'copyin' variables from Master copy to threadprivate
1941   /// copies.
1942   ///
1943   /// \param IP insertion block for copyin conditional
1944   /// \param MasterVarPtr a pointer to the master variable
1945   /// \param PrivateVarPtr a pointer to the threadprivate variable
1946   /// \param IntPtrTy Pointer size type
1947   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1948   //				 and copy.in.end block
1949   ///
1950   /// \returns The insertion point where copying operation to be emitted.
1951   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1952                                          Value *PrivateAddr,
1953                                          llvm::IntegerType *IntPtrTy,
1954                                          bool BranchtoEnd = true);
1955 
1956   /// Create a runtime call for kmpc_Alloc
1957   ///
1958   /// \param Loc The insert and source location description.
1959   /// \param Size Size of allocated memory space
1960   /// \param Allocator Allocator information instruction
1961   /// \param Name Name of call Instruction for OMP_alloc
1962   ///
1963   /// \returns CallInst to the OMP_Alloc call
1964   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1965                            Value *Allocator, std::string Name = "");
1966 
1967   /// Create a runtime call for kmpc_free
1968   ///
1969   /// \param Loc The insert and source location description.
1970   /// \param Addr Address of memory space to be freed
1971   /// \param Allocator Allocator information instruction
1972   /// \param Name Name of call Instruction for OMP_Free
1973   ///
1974   /// \returns CallInst to the OMP_Free call
1975   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1976                           Value *Allocator, std::string Name = "");
1977 
1978   /// Create a runtime call for kmpc_threadprivate_cached
1979   ///
1980   /// \param Loc The insert and source location description.
1981   /// \param Pointer pointer to data to be cached
1982   /// \param Size size of data to be cached
1983   /// \param Name Name of call Instruction for callinst
1984   ///
1985   /// \returns CallInst to the thread private cache call.
1986   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1987                                       llvm::Value *Pointer,
1988                                       llvm::ConstantInt *Size,
1989                                       const llvm::Twine &Name = Twine(""));
1990 
1991   /// Create a runtime call for __tgt_interop_init
1992   ///
1993   /// \param Loc The insert and source location description.
1994   /// \param InteropVar variable to be allocated
1995   /// \param InteropType type of interop operation
1996   /// \param Device devide to which offloading will occur
1997   /// \param NumDependences  number of dependence variables
1998   /// \param DependenceAddress pointer to dependence variables
1999   /// \param HaveNowaitClause does nowait clause exist
2000   ///
2001   /// \returns CallInst to the __tgt_interop_init call
2002   CallInst *createOMPInteropInit(const LocationDescription &Loc,
2003                                  Value *InteropVar,
2004                                  omp::OMPInteropType InteropType, Value *Device,
2005                                  Value *NumDependences,
2006                                  Value *DependenceAddress,
2007                                  bool HaveNowaitClause);
2008 
2009   /// Create a runtime call for __tgt_interop_destroy
2010   ///
2011   /// \param Loc The insert and source location description.
2012   /// \param InteropVar variable to be allocated
2013   /// \param Device devide to which offloading will occur
2014   /// \param NumDependences  number of dependence variables
2015   /// \param DependenceAddress pointer to dependence variables
2016   /// \param HaveNowaitClause does nowait clause exist
2017   ///
2018   /// \returns CallInst to the __tgt_interop_destroy call
2019   CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
2020                                     Value *InteropVar, Value *Device,
2021                                     Value *NumDependences,
2022                                     Value *DependenceAddress,
2023                                     bool HaveNowaitClause);
2024 
2025   /// Create a runtime call for __tgt_interop_use
2026   ///
2027   /// \param Loc The insert and source location description.
2028   /// \param InteropVar variable to be allocated
2029   /// \param Device devide to which offloading will occur
2030   /// \param NumDependences  number of dependence variables
2031   /// \param DependenceAddress pointer to dependence variables
2032   /// \param HaveNowaitClause does nowait clause exist
2033   ///
2034   /// \returns CallInst to the __tgt_interop_use call
2035   CallInst *createOMPInteropUse(const LocationDescription &Loc,
2036                                 Value *InteropVar, Value *Device,
2037                                 Value *NumDependences, Value *DependenceAddress,
2038                                 bool HaveNowaitClause);
2039 
2040   /// The `omp target` interface
2041   ///
2042   /// For more information about the usage of this interface,
2043   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2044   ///
2045   ///{
2046 
2047   /// Create a runtime call for kmpc_target_init
2048   ///
2049   /// \param Loc The insert and source location description.
2050   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2051   /// \param MinThreads Minimal number of threads, or 0.
2052   /// \param MaxThreads Maximal number of threads, or 0.
2053   /// \param MinTeams Minimal number of teams, or 0.
2054   /// \param MaxTeams Maximal number of teams, or 0.
2055   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
2056                                  int32_t MinThreadsVal = 0,
2057                                  int32_t MaxThreadsVal = 0,
2058                                  int32_t MinTeamsVal = 0,
2059                                  int32_t MaxTeamsVal = 0);
2060 
2061   /// Create a runtime call for kmpc_target_deinit
2062   ///
2063   /// \param Loc The insert and source location description.
2064   /// \param TeamsReductionDataSize The maximal size of all the reduction data
2065   ///        for teams reduction.
2066   /// \param TeamsReductionBufferLength The number of elements (each of up to
2067   ///        \p TeamsReductionDataSize size), in the teams reduction buffer.
2068   void createTargetDeinit(const LocationDescription &Loc,
2069                           int32_t TeamsReductionDataSize = 0,
2070                           int32_t TeamsReductionBufferLength = 1024);
2071 
2072   ///}
2073 
2074   /// Helpers to read/write kernel annotations from the IR.
2075   ///
2076   ///{
2077 
2078   /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
2079   /// is set.
2080   static std::pair<int32_t, int32_t>
2081   readThreadBoundsForKernel(const Triple &T, Function &Kernel);
2082   static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
2083                                          int32_t LB, int32_t UB);
2084 
2085   /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
2086   /// is set.
2087   static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
2088                                                              Function &Kernel);
2089   static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB,
2090                                   int32_t UB);
2091   ///}
2092 
2093 private:
2094   // Sets the function attributes expected for the outlined function
2095   void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
2096 
2097   // Creates the function ID/Address for the given outlined function.
2098   // In the case of an embedded device function the address of the function is
2099   // used, in the case of a non-offload function a constant is created.
2100   Constant *createOutlinedFunctionID(Function *OutlinedFn,
2101                                      StringRef EntryFnIDName);
2102 
2103   // Creates the region entry address for the outlined function
2104   Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2105                                         StringRef EntryFnName);
2106 
2107 public:
2108   /// Functions used to generate a function with the given name.
2109   using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2110 
2111   /// Create a unique name for the entry function using the source location
2112   /// information of the current target region. The name will be something like:
2113   ///
2114   /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2115   ///
2116   /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2117   /// mangled name of the function that encloses the target region and BB is the
2118   /// line number of the target region. CC is a count added when more than one
2119   /// region is located at the same location.
2120   ///
2121   /// If this target outline function is not an offload entry, we don't need to
2122   /// register it. This may happen if it is guarded by an if clause that is
2123   /// false at compile time, or no target archs have been specified.
2124   ///
2125   /// The created target region ID is used by the runtime library to identify
2126   /// the current target region, so it only has to be unique and not
2127   /// necessarily point to anything. It could be the pointer to the outlined
2128   /// function that implements the target region, but we aren't using that so
2129   /// that the compiler doesn't need to keep that, and could therefore inline
2130   /// the host function if proven worthwhile during optimization. In the other
2131   /// hand, if emitting code for the device, the ID has to be the function
2132   /// address so that it can retrieved from the offloading entry and launched
2133   /// by the runtime library. We also mark the outlined function to have
2134   /// external linkage in case we are emitting code for the device, because
2135   /// these functions will be entry points to the device.
2136   ///
2137   /// \param InfoManager The info manager keeping track of the offload entries
2138   /// \param EntryInfo The entry information about the function
2139   /// \param GenerateFunctionCallback The callback function to generate the code
2140   /// \param OutlinedFunction Pointer to the outlined function
2141   /// \param EntryFnIDName Name of the ID o be created
2142   void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2143                                 FunctionGenCallback &GenerateFunctionCallback,
2144                                 bool IsOffloadEntry, Function *&OutlinedFn,
2145                                 Constant *&OutlinedFnID);
2146 
2147   /// Registers the given function and sets up the attribtues of the function
2148   /// Returns the FunctionID.
2149   ///
2150   /// \param InfoManager The info manager keeping track of the offload entries
2151   /// \param EntryInfo The entry information about the function
2152   /// \param OutlinedFunction Pointer to the outlined function
2153   /// \param EntryFnName Name of the outlined function
2154   /// \param EntryFnIDName Name of the ID o be created
2155   Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2156                                          Function *OutlinedFunction,
2157                                          StringRef EntryFnName,
2158                                          StringRef EntryFnIDName);
2159 
2160   /// Type of BodyGen to use for region codegen
2161   ///
2162   /// Priv: If device pointer privatization is required, emit the body of the
2163   /// region here. It will have to be duplicated: with and without
2164   /// privatization.
2165   /// DupNoPriv: If we need device pointer privatization, we need
2166   /// to emit the body of the region with no privatization in the 'else' branch
2167   /// of the conditional.
2168   /// NoPriv: If we don't require privatization of device
2169   /// pointers, we emit the body in between the runtime calls. This avoids
2170   /// duplicating the body code.
2171   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
2172 
2173   /// Callback type for creating the map infos for the kernel parameters.
2174   /// \param CodeGenIP is the insertion point where code should be generated,
2175   ///        if any.
2176   using GenMapInfoCallbackTy =
2177       function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
2178 
2179   /// Generator for '#omp target data'
2180   ///
2181   /// \param Loc The location where the target data construct was encountered.
2182   /// \param AllocaIP The insertion points to be used for alloca instructions.
2183   /// \param CodeGenIP The insertion point at which the target directive code
2184   /// should be placed.
2185   /// \param IsBegin If true then emits begin mapper call otherwise emits
2186   /// end mapper call.
2187   /// \param DeviceID Stores the DeviceID from the device clause.
2188   /// \param IfCond Value which corresponds to the if clause condition.
2189   /// \param Info Stores all information realted to the Target Data directive.
2190   /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2191   /// \param BodyGenCB Optional Callback to generate the region code.
2192   /// \param DeviceAddrCB Optional callback to generate code related to
2193   /// use_device_ptr and use_device_addr.
2194   /// \param CustomMapperCB Optional callback to generate code related to
2195   /// custom mappers.
2196   OpenMPIRBuilder::InsertPointTy createTargetData(
2197       const LocationDescription &Loc, InsertPointTy AllocaIP,
2198       InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2199       TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
2200       omp::RuntimeFunction *MapperFunc = nullptr,
2201       function_ref<InsertPointTy(InsertPointTy CodeGenIP,
2202                                  BodyGenTy BodyGenType)>
2203           BodyGenCB = nullptr,
2204       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2205       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2206       Value *SrcLocInfo = nullptr);
2207 
2208   using TargetBodyGenCallbackTy = function_ref<InsertPointTy(
2209       InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2210 
2211   using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy(
2212       Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
2213       InsertPointTy CodeGenIP)>;
2214 
2215   /// Generator for '#omp target'
2216   ///
2217   /// \param Loc where the target data construct was encountered.
2218   /// \param CodeGenIP The insertion point where the call to the outlined
2219   /// function should be emitted.
2220   /// \param EntryInfo The entry information about the function.
2221   /// \param NumTeams Number of teams specified in the num_teams clause.
2222   /// \param NumThreads Number of teams specified in the thread_limit clause.
2223   /// \param Inputs The input values to the region that will be passed.
2224   /// as arguments to the outlined function.
2225   /// \param BodyGenCB Callback that will generate the region code.
2226   /// \param ArgAccessorFuncCB Callback that will generate accessors
2227   /// instructions for passed in target arguments where neccessary
2228   InsertPointTy createTarget(const LocationDescription &Loc,
2229                              OpenMPIRBuilder::InsertPointTy AllocaIP,
2230                              OpenMPIRBuilder::InsertPointTy CodeGenIP,
2231                              TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2232                              int32_t NumThreads,
2233                              SmallVectorImpl<Value *> &Inputs,
2234                              GenMapInfoCallbackTy GenMapInfoCB,
2235                              TargetBodyGenCallbackTy BodyGenCB,
2236                              TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB);
2237 
2238   /// Returns __kmpc_for_static_init_* runtime function for the specified
2239   /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2240   /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2241   FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2242                                              bool IsGPUDistribute);
2243 
2244   /// Returns __kmpc_dispatch_init_* runtime function for the specified
2245   /// size \a IVSize and sign \a IVSigned.
2246   FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2247 
2248   /// Returns __kmpc_dispatch_next_* runtime function for the specified
2249   /// size \a IVSize and sign \a IVSigned.
2250   FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2251 
2252   /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2253   /// size \a IVSize and sign \a IVSigned.
2254   FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2255 
2256   /// Declarations for LLVM-IR types (simple, array, function and structure) are
2257   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2258   /// we provide the declarations, the initializeTypes function will provide the
2259   /// values.
2260   ///
2261   ///{
2262 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2263 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
2264   ArrayType *VarName##Ty = nullptr;                                            \
2265   PointerType *VarName##PtrTy = nullptr;
2266 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
2267   FunctionType *VarName = nullptr;                                             \
2268   PointerType *VarName##Ptr = nullptr;
2269 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
2270   StructType *VarName = nullptr;                                               \
2271   PointerType *VarName##Ptr = nullptr;
2272 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2273 
2274   ///}
2275 
2276 private:
2277   /// Create all simple and struct types exposed by the runtime and remember
2278   /// the llvm::PointerTypes of them for easy access later.
2279   void initializeTypes(Module &M);
2280 
2281   /// Common interface for generating entry calls for OMP Directives.
2282   /// if the directive has a region/body, It will set the insertion
2283   /// point to the body
2284   ///
2285   /// \param OMPD Directive to generate entry blocks for
2286   /// \param EntryCall Call to the entry OMP Runtime Function
2287   /// \param ExitBB block where the region ends.
2288   /// \param Conditional indicate if the entry call result will be used
2289   ///        to evaluate a conditional of whether a thread will execute
2290   ///        body code or not.
2291   ///
2292   /// \return The insertion position in exit block
2293   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2294                                          BasicBlock *ExitBB,
2295                                          bool Conditional = false);
2296 
2297   /// Common interface to finalize the region
2298   ///
2299   /// \param OMPD Directive to generate exiting code for
2300   /// \param FinIP Insertion point for emitting Finalization code and exit call
2301   /// \param ExitCall Call to the ending OMP Runtime Function
2302   /// \param HasFinalize indicate if the directive will require finalization
2303   ///         and has a finalization callback in the stack that
2304   ///        should be called.
2305   ///
2306   /// \return The insertion position in exit block
2307   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2308                                         InsertPointTy FinIP,
2309                                         Instruction *ExitCall,
2310                                         bool HasFinalize = true);
2311 
2312   /// Common Interface to generate OMP inlined regions
2313   ///
2314   /// \param OMPD Directive to generate inlined region for
2315   /// \param EntryCall Call to the entry OMP Runtime Function
2316   /// \param ExitCall Call to the ending OMP Runtime Function
2317   /// \param BodyGenCB Body code generation callback.
2318   /// \param FiniCB Finalization Callback. Will be called when finalizing region
2319   /// \param Conditional indicate if the entry call result will be used
2320   ///        to evaluate a conditional of whether a thread will execute
2321   ///        body code or not.
2322   /// \param HasFinalize indicate if the directive will require finalization
2323   ///        and has a finalization callback in the stack that
2324   ///        should be called.
2325   /// \param IsCancellable if HasFinalize is set to true, indicate if the
2326   ///        the directive should be cancellable.
2327   /// \return The insertion point after the region
2328 
2329   InsertPointTy
2330   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2331                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2332                        FinalizeCallbackTy FiniCB, bool Conditional = false,
2333                        bool HasFinalize = true, bool IsCancellable = false);
2334 
2335   /// Get the platform-specific name separator.
2336   /// \param Parts different parts of the final name that needs separation
2337   /// \param FirstSeparator First separator used between the initial two
2338   ///        parts of the name.
2339   /// \param Separator separator used between all of the rest consecutive
2340   ///        parts of the name
2341   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2342                                            StringRef FirstSeparator,
2343                                            StringRef Separator);
2344 
2345   /// Returns corresponding lock object for the specified critical region
2346   /// name. If the lock object does not exist it is created, otherwise the
2347   /// reference to the existing copy is returned.
2348   /// \param CriticalName Name of the critical region.
2349   ///
2350   Value *getOMPCriticalRegionLock(StringRef CriticalName);
2351 
2352   /// Callback type for Atomic Expression update
2353   /// ex:
2354   /// \code{.cpp}
2355   /// unsigned x = 0;
2356   /// #pragma omp atomic update
2357   /// x = Expr(x_old);  //Expr() is any legal operation
2358   /// \endcode
2359   ///
2360   /// \param XOld the value of the atomic memory address to use for update
2361   /// \param IRB reference to the IRBuilder to use
2362   ///
2363   /// \returns Value to update X to.
2364   using AtomicUpdateCallbackTy =
2365       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2366 
2367 private:
2368   enum AtomicKind { Read, Write, Update, Capture, Compare };
2369 
2370   /// Determine whether to emit flush or not
2371   ///
2372   /// \param Loc    The insert and source location description.
2373   /// \param AO     The required atomic ordering
2374   /// \param AK     The OpenMP atomic operation kind used.
2375   ///
2376   /// \returns		wether a flush was emitted or not
2377   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2378                                     AtomicOrdering AO, AtomicKind AK);
2379 
2380   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2381   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2382   /// Only Scalar data types.
2383   ///
2384   /// \param AllocaIP	  The insertion point to be used for alloca
2385   ///                   instructions.
2386   /// \param X			    The target atomic pointer to be updated
2387   /// \param XElemTy    The element type of the atomic pointer.
2388   /// \param Expr		    The value to update X with.
2389   /// \param AO			    Atomic ordering of the generated atomic
2390   ///                   instructions.
2391   /// \param RMWOp		  The binary operation used for update. If
2392   ///                   operation is not supported by atomicRMW,
2393   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
2394   ///                   Then a `cmpExch` based	atomic will be generated.
2395   /// \param UpdateOp 	Code generator for complex expressions that cannot be
2396   ///                   expressed through atomicrmw instruction.
2397   /// \param VolatileX	     true if \a X volatile?
2398   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2399   ///                     update expression, false otherwise.
2400   ///                     (e.g. true for X = X BinOp Expr)
2401   ///
2402   /// \returns A pair of the old value of X before the update, and the value
2403   ///          used for the update.
2404   std::pair<Value *, Value *>
2405   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2406                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2407                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2408                    bool IsXBinopExpr);
2409 
2410   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2411   ///
2412   /// \Return The instruction
2413   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2414                                 AtomicRMWInst::BinOp RMWOp);
2415 
2416 public:
2417   /// a struct to pack relevant information while generating atomic Ops
2418   struct AtomicOpValue {
2419     Value *Var = nullptr;
2420     Type *ElemTy = nullptr;
2421     bool IsSigned = false;
2422     bool IsVolatile = false;
2423   };
2424 
2425   /// Emit atomic Read for : V = X --- Only Scalar data types.
2426   ///
2427   /// \param Loc    The insert and source location description.
2428   /// \param X			The target pointer to be atomically read
2429   /// \param V			Memory address where to store atomically read
2430   /// 					    value
2431   /// \param AO			Atomic ordering of the generated atomic
2432   /// 					    instructions.
2433   ///
2434   /// \return Insertion point after generated atomic read IR.
2435   InsertPointTy createAtomicRead(const LocationDescription &Loc,
2436                                  AtomicOpValue &X, AtomicOpValue &V,
2437                                  AtomicOrdering AO);
2438 
2439   /// Emit atomic write for : X = Expr --- Only Scalar data types.
2440   ///
2441   /// \param Loc    The insert and source location description.
2442   /// \param X			The target pointer to be atomically written to
2443   /// \param Expr		The value to store.
2444   /// \param AO			Atomic ordering of the generated atomic
2445   ///               instructions.
2446   ///
2447   /// \return Insertion point after generated atomic Write IR.
2448   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
2449                                   AtomicOpValue &X, Value *Expr,
2450                                   AtomicOrdering AO);
2451 
2452   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2453   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2454   /// Only Scalar data types.
2455   ///
2456   /// \param Loc      The insert and source location description.
2457   /// \param AllocaIP The insertion point to be used for alloca instructions.
2458   /// \param X        The target atomic pointer to be updated
2459   /// \param Expr     The value to update X with.
2460   /// \param AO       Atomic ordering of the generated atomic instructions.
2461   /// \param RMWOp    The binary operation used for update. If operation
2462   ///                 is	not supported by atomicRMW, or belong to
2463   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2464   ///                 atomic will be generated.
2465   /// \param UpdateOp 	Code generator for complex expressions that cannot be
2466   ///                   expressed through atomicrmw instruction.
2467   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2468   ///                     update expression, false otherwise.
2469   ///	                    (e.g. true for X = X BinOp Expr)
2470   ///
2471   /// \return Insertion point after generated atomic update IR.
2472   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
2473                                    InsertPointTy AllocaIP, AtomicOpValue &X,
2474                                    Value *Expr, AtomicOrdering AO,
2475                                    AtomicRMWInst::BinOp RMWOp,
2476                                    AtomicUpdateCallbackTy &UpdateOp,
2477                                    bool IsXBinopExpr);
2478 
2479   /// Emit atomic update for constructs: --- Only Scalar data types
2480   /// V = X; X = X BinOp Expr ,
2481   /// X = X BinOp Expr; V = X,
2482   /// V = X; X = Expr BinOp X,
2483   /// X = Expr BinOp X; V = X,
2484   /// V = X; X = UpdateOp(X),
2485   /// X = UpdateOp(X); V = X,
2486   ///
2487   /// \param Loc        The insert and source location description.
2488   /// \param AllocaIP   The insertion point to be used for alloca instructions.
2489   /// \param X          The target atomic pointer to be updated
2490   /// \param V          Memory address where to store captured value
2491   /// \param Expr       The value to update X with.
2492   /// \param AO         Atomic ordering of the generated atomic instructions
2493   /// \param RMWOp      The binary operation used for update. If
2494   ///                   operation is not supported by atomicRMW, or belong to
2495   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2496   ///                   atomic will be generated.
2497   /// \param UpdateOp   Code generator for complex expressions that cannot be
2498   ///                   expressed through atomicrmw instruction.
2499   /// \param UpdateExpr true if X is an in place update of the form
2500   ///                   X = X BinOp Expr or X = Expr BinOp X
2501   /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2502   ///                     update expression, false otherwise.
2503   ///                     (e.g. true for X = X BinOp Expr)
2504   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2505   ///                        'v', not an updated one.
2506   ///
2507   /// \return Insertion point after generated atomic capture IR.
2508   InsertPointTy
2509   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
2510                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2511                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2512                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2513                       bool IsPostfixUpdate, bool IsXBinopExpr);
2514 
2515   /// Emit atomic compare for constructs: --- Only scalar data types
2516   /// cond-expr-stmt:
2517   /// x = x ordop expr ? expr : x;
2518   /// x = expr ordop x ? expr : x;
2519   /// x = x == e ? d : x;
2520   /// x = e == x ? d : x; (this one is not in the spec)
2521   /// cond-update-stmt:
2522   /// if (x ordop expr) { x = expr; }
2523   /// if (expr ordop x) { x = expr; }
2524   /// if (x == e) { x = d; }
2525   /// if (e == x) { x = d; } (this one is not in the spec)
2526   /// conditional-update-capture-atomic:
2527   /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2528   /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2529   /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2530   ///                                         IsFailOnly=true)
2531   /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2532   /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2533   ///                                                IsFailOnly=true)
2534   ///
2535   /// \param Loc          The insert and source location description.
2536   /// \param X            The target atomic pointer to be updated.
2537   /// \param V            Memory address where to store captured value (for
2538   ///                     compare capture only).
2539   /// \param R            Memory address where to store comparison result
2540   ///                     (for compare capture with '==' only).
2541   /// \param E            The expected value ('e') for forms that use an
2542   ///                     equality comparison or an expression ('expr') for
2543   ///                     forms that use 'ordop' (logically an atomic maximum or
2544   ///                     minimum).
2545   /// \param D            The desired value for forms that use an equality
2546   ///                     comparison. If forms that use 'ordop', it should be
2547   ///                     \p nullptr.
2548   /// \param AO           Atomic ordering of the generated atomic instructions.
2549   /// \param Op           Atomic compare operation. It can only be ==, <, or >.
2550   /// \param IsXBinopExpr True if the conditional statement is in the form where
2551   ///                     x is on LHS. It only matters for < or >.
2552   /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
2553   ///                         'v', not an updated one (for compare capture
2554   ///                         only).
2555   /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
2556   ///                     only when the comparison fails. This is only valid for
2557   ///                     the case the comparison is '=='.
2558   ///
2559   /// \return Insertion point after generated atomic capture IR.
2560   InsertPointTy
2561   createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
2562                       AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
2563                       AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
2564                       bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2565   InsertPointTy createAtomicCompare(const LocationDescription &Loc,
2566                                     AtomicOpValue &X, AtomicOpValue &V,
2567                                     AtomicOpValue &R, Value *E, Value *D,
2568                                     AtomicOrdering AO,
2569                                     omp::OMPAtomicCompareOp Op,
2570                                     bool IsXBinopExpr, bool IsPostfixUpdate,
2571                                     bool IsFailOnly, AtomicOrdering Failure);
2572 
2573   /// Create the control flow structure of a canonical OpenMP loop.
2574   ///
2575   /// The emitted loop will be disconnected, i.e. no edge to the loop's
2576   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2577   /// IRBuilder location is not preserved.
2578   ///
2579   /// \param DL        DebugLoc used for the instructions in the skeleton.
2580   /// \param TripCount Value to be used for the trip count.
2581   /// \param F         Function in which to insert the BasicBlocks.
2582   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
2583   ///                         typically the body itself.
2584   /// \param PostInsertBefore Where to insert BBs that execute after the body.
2585   /// \param Name      Base name used to derive BB
2586   ///                  and instruction names.
2587   ///
2588   /// \returns The CanonicalLoopInfo that represents the emitted loop.
2589   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
2590                                         Function *F,
2591                                         BasicBlock *PreInsertBefore,
2592                                         BasicBlock *PostInsertBefore,
2593                                         const Twine &Name = {});
2594   /// OMP Offload Info Metadata name string
2595   const std::string ompOffloadInfoName = "omp_offload.info";
2596 
2597   /// Loads all the offload entries information from the host IR
2598   /// metadata. This function is only meant to be used with device code
2599   /// generation.
2600   ///
2601   /// \param M         Module to load Metadata info from. Module passed maybe
2602   /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2603   void loadOffloadInfoMetadata(Module &M);
2604 
2605   /// Loads all the offload entries information from the host IR
2606   /// metadata read from the file passed in as the HostFilePath argument. This
2607   /// function is only meant to be used with device code generation.
2608   ///
2609   /// \param HostFilePath The path to the host IR file,
2610   /// used to load in offload metadata for the device, allowing host and device
2611   /// to maintain the same metadata mapping.
2612   void loadOffloadInfoMetadata(StringRef HostFilePath);
2613 
2614   /// Gets (if variable with the given name already exist) or creates
2615   /// internal global variable with the specified Name. The created variable has
2616   /// linkage CommonLinkage by default and is initialized by null value.
2617   /// \param Ty Type of the global variable. If it is exist already the type
2618   /// must be the same.
2619   /// \param Name Name of the variable.
2620   GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
2621                                               unsigned AddressSpace = 0);
2622 
2623   /// Create a global function to register OpenMP requires flags into the
2624   /// runtime, according to the `Config`.
2625   ///
2626   /// This function should be added to the list of constructors of the
2627   /// compilation unit in order to be called before other OpenMP runtime
2628   /// functions.
2629   ///
2630   /// \param Name  Name of the created function.
2631   Function *createRegisterRequires(StringRef Name);
2632 };
2633 
2634 /// Class to represented the control flow structure of an OpenMP canonical loop.
2635 ///
2636 /// The control-flow structure is standardized for easy consumption by
2637 /// directives associated with loops. For instance, the worksharing-loop
2638 /// construct may change this control flow such that each loop iteration is
2639 /// executed on only one thread. The constraints of a canonical loop in brief
2640 /// are:
2641 ///
2642 ///  * The number of loop iterations must have been computed before entering the
2643 ///    loop.
2644 ///
2645 ///  * Has an (unsigned) logical induction variable that starts at zero and
2646 ///    increments by one.
2647 ///
2648 ///  * The loop's CFG itself has no side-effects. The OpenMP specification
2649 ///    itself allows side-effects, but the order in which they happen, including
2650 ///    how often or whether at all, is unspecified. We expect that the frontend
2651 ///    will emit those side-effect instructions somewhere (e.g. before the loop)
2652 ///    such that the CanonicalLoopInfo itself can be side-effect free.
2653 ///
2654 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2655 /// execution of a loop body that satifies these constraints. It does NOT
2656 /// represent arbitrary SESE regions that happen to contain a loop. Do not use
2657 /// CanonicalLoopInfo for such purposes.
2658 ///
2659 /// The control flow can be described as follows:
2660 ///
2661 ///     Preheader
2662 ///        |
2663 ///  /-> Header
2664 ///  |     |
2665 ///  |    Cond---\
2666 ///  |     |     |
2667 ///  |    Body   |
2668 ///  |    | |    |
2669 ///  |   <...>   |
2670 ///  |    | |    |
2671 ///   \--Latch   |
2672 ///              |
2673 ///             Exit
2674 ///              |
2675 ///            After
2676 ///
2677 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2678 /// including) and end at AfterIP (at the After's first instruction, excluding).
2679 /// That is, instructions in the Preheader and After blocks (except the
2680 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2681 /// side-effects. Typically, the Preheader is used to compute the loop's trip
2682 /// count. The instructions from BodyIP (at the Body block's first instruction,
2683 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2684 /// control and thus can have side-effects. The body block is the single entry
2685 /// point into the loop body, which may contain arbitrary control flow as long
2686 /// as all control paths eventually branch to the Latch block.
2687 ///
2688 /// TODO: Consider adding another standardized BasicBlock between Body CFG and
2689 /// Latch to guarantee that there is only a single edge to the latch. It would
2690 /// make loop transformations easier to not needing to consider multiple
2691 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2692 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2693 /// executes after each body iteration.
2694 ///
2695 /// There must be no loop-carried dependencies through llvm::Values. This is
2696 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2697 /// for the induction variable.
2698 ///
2699 /// All code in Header, Cond, Latch and Exit (plus the terminator of the
2700 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2701 /// by assertOK(). They are expected to not be modified unless explicitly
2702 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2703 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2704 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2705 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2706 /// anymore as its underlying control flow may not exist anymore.
2707 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2708 /// may also return a new CanonicalLoopInfo that can be passed to other
2709 /// loop-associated construct implementing methods. These loop-transforming
2710 /// methods may either create a new CanonicalLoopInfo usually using
2711 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2712 /// modify one of the input CanonicalLoopInfo and return it as representing the
2713 /// modified loop. What is done is an implementation detail of
2714 /// transformation-implementing method and callers should always assume that the
2715 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2716 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2717 /// created by createCanonicalLoop, such that transforming methods do not have
2718 /// to special case where the CanonicalLoopInfo originated from.
2719 ///
2720 /// Generally, methods consuming CanonicalLoopInfo do not need an
2721 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2722 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2723 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2724 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2725 /// any InsertPoint in the Preheader, After or Block can still be used after
2726 /// calling such a method.
2727 ///
2728 /// TODO: Provide mechanisms for exception handling and cancellation points.
2729 ///
2730 /// Defined outside OpenMPIRBuilder because nested classes cannot be
2731 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2732 class CanonicalLoopInfo {
2733   friend class OpenMPIRBuilder;
2734 
2735 private:
2736   BasicBlock *Header = nullptr;
2737   BasicBlock *Cond = nullptr;
2738   BasicBlock *Latch = nullptr;
2739   BasicBlock *Exit = nullptr;
2740 
2741   /// Add the control blocks of this loop to \p BBs.
2742   ///
2743   /// This does not include any block from the body, including the one returned
2744   /// by getBody().
2745   ///
2746   /// FIXME: This currently includes the Preheader and After blocks even though
2747   /// their content is (mostly) not under CanonicalLoopInfo's control.
2748   /// Re-evaluated whether this makes sense.
2749   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2750 
2751   /// Sets the number of loop iterations to the given value. This value must be
2752   /// valid in the condition block (i.e., defined in the preheader) and is
2753   /// interpreted as an unsigned integer.
2754   void setTripCount(Value *TripCount);
2755 
2756   /// Replace all uses of the canonical induction variable in the loop body with
2757   /// a new one.
2758   ///
2759   /// The intended use case is to update the induction variable for an updated
2760   /// iteration space such that it can stay normalized in the 0...tripcount-1
2761   /// range.
2762   ///
2763   /// The \p Updater is called with the (presumable updated) current normalized
2764   /// induction variable and is expected to return the value that uses of the
2765   /// pre-updated induction values should use instead, typically dependent on
2766   /// the new induction variable. This is a lambda (instead of e.g. just passing
2767   /// the new value) to be able to distinguish the uses of the pre-updated
2768   /// induction variable and uses of the induction varible to compute the
2769   /// updated induction variable value.
2770   void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2771 
2772 public:
2773   /// Returns whether this object currently represents the IR of a loop. If
2774   /// returning false, it may have been consumed by a loop transformation or not
2775   /// been intialized. Do not use in this case;
isValid()2776   bool isValid() const { return Header; }
2777 
2778   /// The preheader ensures that there is only a single edge entering the loop.
2779   /// Code that must be execute before any loop iteration can be emitted here,
2780   /// such as computing the loop trip count and begin lifetime markers. Code in
2781   /// the preheader is not considered part of the canonical loop.
2782   BasicBlock *getPreheader() const;
2783 
2784   /// The header is the entry for each iteration. In the canonical control flow,
2785   /// it only contains the PHINode for the induction variable.
getHeader()2786   BasicBlock *getHeader() const {
2787     assert(isValid() && "Requires a valid canonical loop");
2788     return Header;
2789   }
2790 
2791   /// The condition block computes whether there is another loop iteration. If
2792   /// yes, branches to the body; otherwise to the exit block.
getCond()2793   BasicBlock *getCond() const {
2794     assert(isValid() && "Requires a valid canonical loop");
2795     return Cond;
2796   }
2797 
2798   /// The body block is the single entry for a loop iteration and not controlled
2799   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2800   /// eventually branch to the \p Latch block.
getBody()2801   BasicBlock *getBody() const {
2802     assert(isValid() && "Requires a valid canonical loop");
2803     return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2804   }
2805 
2806   /// Reaching the latch indicates the end of the loop body code. In the
2807   /// canonical control flow, it only contains the increment of the induction
2808   /// variable.
getLatch()2809   BasicBlock *getLatch() const {
2810     assert(isValid() && "Requires a valid canonical loop");
2811     return Latch;
2812   }
2813 
2814   /// Reaching the exit indicates no more iterations are being executed.
getExit()2815   BasicBlock *getExit() const {
2816     assert(isValid() && "Requires a valid canonical loop");
2817     return Exit;
2818   }
2819 
2820   /// The after block is intended for clean-up code such as lifetime end
2821   /// markers. It is separate from the exit block to ensure, analogous to the
2822   /// preheader, it having just a single entry edge and being free from PHI
2823   /// nodes should there be multiple loop exits (such as from break
2824   /// statements/cancellations).
getAfter()2825   BasicBlock *getAfter() const {
2826     assert(isValid() && "Requires a valid canonical loop");
2827     return Exit->getSingleSuccessor();
2828   }
2829 
2830   /// Returns the llvm::Value containing the number of loop iterations. It must
2831   /// be valid in the preheader and always interpreted as an unsigned integer of
2832   /// any bit-width.
getTripCount()2833   Value *getTripCount() const {
2834     assert(isValid() && "Requires a valid canonical loop");
2835     Instruction *CmpI = &Cond->front();
2836     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2837     return CmpI->getOperand(1);
2838   }
2839 
2840   /// Returns the instruction representing the current logical induction
2841   /// variable. Always unsigned, always starting at 0 with an increment of one.
getIndVar()2842   Instruction *getIndVar() const {
2843     assert(isValid() && "Requires a valid canonical loop");
2844     Instruction *IndVarPHI = &Header->front();
2845     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2846     return IndVarPHI;
2847   }
2848 
2849   /// Return the type of the induction variable (and the trip count).
getIndVarType()2850   Type *getIndVarType() const {
2851     assert(isValid() && "Requires a valid canonical loop");
2852     return getIndVar()->getType();
2853   }
2854 
2855   /// Return the insertion point for user code before the loop.
getPreheaderIP()2856   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
2857     assert(isValid() && "Requires a valid canonical loop");
2858     BasicBlock *Preheader = getPreheader();
2859     return {Preheader, std::prev(Preheader->end())};
2860   };
2861 
2862   /// Return the insertion point for user code in the body.
getBodyIP()2863   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
2864     assert(isValid() && "Requires a valid canonical loop");
2865     BasicBlock *Body = getBody();
2866     return {Body, Body->begin()};
2867   };
2868 
2869   /// Return the insertion point for user code after the loop.
getAfterIP()2870   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
2871     assert(isValid() && "Requires a valid canonical loop");
2872     BasicBlock *After = getAfter();
2873     return {After, After->begin()};
2874   };
2875 
getFunction()2876   Function *getFunction() const {
2877     assert(isValid() && "Requires a valid canonical loop");
2878     return Header->getParent();
2879   }
2880 
2881   /// Consistency self-check.
2882   void assertOK() const;
2883 
2884   /// Invalidate this loop. That is, the underlying IR does not fulfill the
2885   /// requirements of an OpenMP canonical loop anymore.
2886   void invalidate();
2887 };
2888 
2889 } // end namespace llvm
2890 
2891 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
2892