1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Analysis/MemorySSAUpdater.h"
18 #include "llvm/Frontend/OpenMP/OMPConstants.h"
19 #include "llvm/IR/DebugLoc.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/Support/Allocator.h"
22 #include <forward_list>
23 #include <map>
24 #include <optional>
25 
26 namespace llvm {
27 class CanonicalLoopInfo;
28 struct TargetRegionEntryInfo;
29 class OffloadEntriesInfoManager;
30 class OpenMPIRBuilder;
31 
32 /// Move the instruction after an InsertPoint to the beginning of another
33 /// BasicBlock.
34 ///
35 /// The instructions after \p IP are moved to the beginning of \p New which must
36 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
37 /// \p New will be added such that there is no semantic change. Otherwise, the
38 /// \p IP insert block remains degenerate and it is up to the caller to insert a
39 /// terminator.
40 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
41               bool CreateBranch);
42 
43 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
44 /// insert location will stick to after the instruction before the insertion
45 /// point (instead of moving with the instruction the InsertPoint stores
46 /// internally).
47 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
48 
49 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
50 /// (missing the terminator).
51 ///
52 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
53 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
54 /// is true, a branch to the new successor will new created such that
55 /// semantically there is no change; otherwise the block of the insertion point
56 /// remains degenerate and it is the caller's responsibility to insert a
57 /// terminator. Returns the new successor block.
58 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
59                     llvm::Twine Name = {});
60 
61 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
62 /// degenerate (missing the terminator).  Its new insert location will stick to
63 /// after the instruction before the insertion point (instead of moving with the
64 /// instruction the InsertPoint stores internally).
65 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
66                     llvm::Twine Name = {});
67 
68 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
69 /// degenerate (missing the terminator).  Its new insert location will stick to
70 /// after the instruction before the insertion point (instead of moving with the
71 /// instruction the InsertPoint stores internally).
72 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
73 
74 /// Like splitBB, but reuses the current block's name for the new name.
75 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
76                               llvm::Twine Suffix = ".split");
77 
78 /// Captures attributes that affect generating LLVM-IR using the
79 /// OpenMPIRBuilder and related classes. Note that not all attributes are
80 /// required for all classes or functions. In some use cases the configuration
81 /// is not necessary at all, because because the only functions that are called
82 /// are ones that are not dependent on the configuration.
83 class OpenMPIRBuilderConfig {
84 public:
85   /// Flag for specifying if the compilation is done for embedded device code
86   /// or host code.
87   std::optional<bool> IsTargetDevice;
88 
89   /// Flag for specifying if the compilation is done for an accelerator.
90   std::optional<bool> IsGPU;
91 
92   /// Flag for specifying weather a requires unified_shared_memory
93   /// directive is present or not.
94   std::optional<bool> HasRequiresUnifiedSharedMemory;
95 
96   // Flag for specifying if offloading is mandatory.
97   std::optional<bool> OpenMPOffloadMandatory;
98 
99   /// First separator used between the initial two parts of a name.
100   std::optional<StringRef> FirstSeparator;
101   /// Separator used between all of the rest consecutive parts of s name
102   std::optional<StringRef> Separator;
103 
104   OpenMPIRBuilderConfig() {}
105   OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
106                         bool HasRequiresUnifiedSharedMemory,
107                         bool OpenMPOffloadMandatory)
108       : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
109         HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory),
110         OpenMPOffloadMandatory(OpenMPOffloadMandatory) {}
111 
112   // Getters functions that assert if the required values are not present.
113   bool isTargetDevice() const {
114     assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
115     return *IsTargetDevice;
116   }
117 
118   bool isGPU() const {
119     assert(IsGPU.has_value() && "IsGPU is not set");
120     return *IsGPU;
121   }
122 
123   bool hasRequiresUnifiedSharedMemory() const {
124     assert(HasRequiresUnifiedSharedMemory.has_value() &&
125            "HasUnifiedSharedMemory is not set");
126     return *HasRequiresUnifiedSharedMemory;
127   }
128 
129   bool openMPOffloadMandatory() const {
130     assert(OpenMPOffloadMandatory.has_value() &&
131            "OpenMPOffloadMandatory is not set");
132     return *OpenMPOffloadMandatory;
133   }
134   // Returns the FirstSeparator if set, otherwise use the default separator
135   // depending on isGPU
136   StringRef firstSeparator() const {
137     if (FirstSeparator.has_value())
138       return *FirstSeparator;
139     if (isGPU())
140       return "_";
141     return ".";
142   }
143 
144   // Returns the Separator if set, otherwise use the default separator depending
145   // on isGPU
146   StringRef separator() const {
147     if (Separator.has_value())
148       return *Separator;
149     if (isGPU())
150       return "$";
151     return ".";
152   }
153 
154   void setIsTargetDevice(bool Value) { IsTargetDevice = Value; }
155   void setIsGPU(bool Value) { IsGPU = Value; }
156   void setHasRequiresUnifiedSharedMemory(bool Value) {
157     HasRequiresUnifiedSharedMemory = Value;
158   }
159   void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
160   void setSeparator(StringRef S) { Separator = S; }
161 };
162 
163 /// Data structure to contain the information needed to uniquely identify
164 /// a target entry.
165 struct TargetRegionEntryInfo {
166   std::string ParentName;
167   unsigned DeviceID;
168   unsigned FileID;
169   unsigned Line;
170   unsigned Count;
171 
172   TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {}
173   TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
174                         unsigned FileID, unsigned Line, unsigned Count = 0)
175       : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
176         Count(Count) {}
177 
178   static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
179                                          StringRef ParentName,
180                                          unsigned DeviceID, unsigned FileID,
181                                          unsigned Line, unsigned Count);
182 
183   bool operator<(const TargetRegionEntryInfo RHS) const {
184     return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
185            std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
186                            RHS.Count);
187   }
188 };
189 
190 /// Class that manages information about offload code regions and data
191 class OffloadEntriesInfoManager {
192   /// Number of entries registered so far.
193   OpenMPIRBuilder *OMPBuilder;
194   unsigned OffloadingEntriesNum = 0;
195 
196 public:
197   /// Base class of the entries info.
198   class OffloadEntryInfo {
199   public:
200     /// Kind of a given entry.
201     enum OffloadingEntryInfoKinds : unsigned {
202       /// Entry is a target region.
203       OffloadingEntryInfoTargetRegion = 0,
204       /// Entry is a declare target variable.
205       OffloadingEntryInfoDeviceGlobalVar = 1,
206       /// Invalid entry info.
207       OffloadingEntryInfoInvalid = ~0u
208     };
209 
210   protected:
211     OffloadEntryInfo() = delete;
212     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
213     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
214                               uint32_t Flags)
215         : Flags(Flags), Order(Order), Kind(Kind) {}
216     ~OffloadEntryInfo() = default;
217 
218   public:
219     bool isValid() const { return Order != ~0u; }
220     unsigned getOrder() const { return Order; }
221     OffloadingEntryInfoKinds getKind() const { return Kind; }
222     uint32_t getFlags() const { return Flags; }
223     void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
224     Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
225     void setAddress(Constant *V) {
226       assert(!Addr.pointsToAliveValue() && "Address has been set before!");
227       Addr = V;
228     }
229     static bool classof(const OffloadEntryInfo *Info) { return true; }
230 
231   private:
232     /// Address of the entity that has to be mapped for offloading.
233     WeakTrackingVH Addr;
234 
235     /// Flags associated with the device global.
236     uint32_t Flags = 0u;
237 
238     /// Order this entry was emitted.
239     unsigned Order = ~0u;
240 
241     OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
242   };
243 
244   /// Return true if a there are no entries defined.
245   bool empty() const;
246   /// Return number of entries defined so far.
247   unsigned size() const { return OffloadingEntriesNum; }
248 
249   OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
250 
251   //
252   // Target region entries related.
253   //
254 
255   /// Kind of the target registry entry.
256   enum OMPTargetRegionEntryKind : uint32_t {
257     /// Mark the entry as target region.
258     OMPTargetRegionEntryTargetRegion = 0x0,
259     /// Mark the entry as a global constructor.
260     OMPTargetRegionEntryCtor = 0x02,
261     /// Mark the entry as a global destructor.
262     OMPTargetRegionEntryDtor = 0x04,
263   };
264 
265   /// Target region entries info.
266   class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
267     /// Address that can be used as the ID of the entry.
268     Constant *ID = nullptr;
269 
270   public:
271     OffloadEntryInfoTargetRegion()
272         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
273     explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
274                                           Constant *ID,
275                                           OMPTargetRegionEntryKind Flags)
276         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
277           ID(ID) {
278       setAddress(Addr);
279     }
280 
281     Constant *getID() const { return ID; }
282     void setID(Constant *V) {
283       assert(!ID && "ID has been set before!");
284       ID = V;
285     }
286     static bool classof(const OffloadEntryInfo *Info) {
287       return Info->getKind() == OffloadingEntryInfoTargetRegion;
288     }
289   };
290 
291   /// Initialize target region entry.
292   /// This is ONLY needed for DEVICE compilation.
293   void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
294                                        unsigned Order);
295   /// Register target region entry.
296   void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
297                                      Constant *Addr, Constant *ID,
298                                      OMPTargetRegionEntryKind Flags);
299   /// Return true if a target region entry with the provided information
300   /// exists.
301   bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
302                                 bool IgnoreAddressId = false) const;
303 
304   // Return the Name based on \a EntryInfo using the next available Count.
305   void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
306                                   const TargetRegionEntryInfo &EntryInfo);
307 
308   /// brief Applies action \a Action on all registered entries.
309   typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
310                             const OffloadEntryInfoTargetRegion &)>
311       OffloadTargetRegionEntryInfoActTy;
312   void
313   actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
314 
315   //
316   // Device global variable entries related.
317   //
318 
319   /// Kind of the global variable entry..
320   enum OMPTargetGlobalVarEntryKind : uint32_t {
321     /// Mark the entry as a to declare target.
322     OMPTargetGlobalVarEntryTo = 0x0,
323     /// Mark the entry as a to declare target link.
324     OMPTargetGlobalVarEntryLink = 0x1,
325     /// Mark the entry as a declare target enter.
326     OMPTargetGlobalVarEntryEnter = 0x2,
327     /// Mark the entry as having no declare target entry kind.
328     OMPTargetGlobalVarEntryNone = 0x3,
329   };
330 
331   /// Kind of device clause for declare target variables
332   /// and functions
333   /// NOTE: Currently not used as a part of a variable entry
334   /// used for Flang and Clang to interface with the variable
335   /// related registration functions
336   enum OMPTargetDeviceClauseKind : uint32_t {
337     /// The target is marked for all devices
338     OMPTargetDeviceClauseAny = 0x0,
339     /// The target is marked for non-host devices
340     OMPTargetDeviceClauseNoHost = 0x1,
341     /// The target is marked for host devices
342     OMPTargetDeviceClauseHost = 0x2,
343     /// The target is marked as having no clause
344     OMPTargetDeviceClauseNone = 0x3
345   };
346 
347   /// Device global variable entries info.
348   class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
349     /// Type of the global variable.
350     int64_t VarSize;
351     GlobalValue::LinkageTypes Linkage;
352 
353   public:
354     OffloadEntryInfoDeviceGlobalVar()
355         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
356     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
357                                              OMPTargetGlobalVarEntryKind Flags)
358         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
359     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
360                                              int64_t VarSize,
361                                              OMPTargetGlobalVarEntryKind Flags,
362                                              GlobalValue::LinkageTypes Linkage)
363         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
364           VarSize(VarSize), Linkage(Linkage) {
365       setAddress(Addr);
366     }
367 
368     int64_t getVarSize() const { return VarSize; }
369     void setVarSize(int64_t Size) { VarSize = Size; }
370     GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
371     void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
372     static bool classof(const OffloadEntryInfo *Info) {
373       return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
374     }
375   };
376 
377   /// Initialize device global variable entry.
378   /// This is ONLY used for DEVICE compilation.
379   void initializeDeviceGlobalVarEntryInfo(StringRef Name,
380                                           OMPTargetGlobalVarEntryKind Flags,
381                                           unsigned Order);
382 
383   /// Register device global variable entry.
384   void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
385                                         int64_t VarSize,
386                                         OMPTargetGlobalVarEntryKind Flags,
387                                         GlobalValue::LinkageTypes Linkage);
388   /// Checks if the variable with the given name has been registered already.
389   bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
390     return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
391   }
392   /// Applies action \a Action on all registered entries.
393   typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
394       OffloadDeviceGlobalVarEntryInfoActTy;
395   void actOnDeviceGlobalVarEntriesInfo(
396       const OffloadDeviceGlobalVarEntryInfoActTy &Action);
397 
398 private:
399   /// Return the count of entries at a particular source location.
400   unsigned
401   getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
402 
403   /// Update the count of entries at a particular source location.
404   void
405   incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
406 
407   static TargetRegionEntryInfo
408   getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
409     return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
410                                  EntryInfo.FileID, EntryInfo.Line, 0);
411   }
412 
413   // Count of entries at a location.
414   std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
415 
416   // Storage for target region entries kind.
417   typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
418       OffloadEntriesTargetRegionTy;
419   OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
420   /// Storage for device global variable entries kind. The storage is to be
421   /// indexed by mangled name.
422   typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
423       OffloadEntriesDeviceGlobalVarTy;
424   OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
425 };
426 
427 /// An interface to create LLVM-IR for OpenMP directives.
428 ///
429 /// Each OpenMP directive has a corresponding public generator method.
430 class OpenMPIRBuilder {
431 public:
432   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
433   /// not have an effect on \p M (see initialize)
434   OpenMPIRBuilder(Module &M)
435       : M(M), Builder(M.getContext()), OffloadInfoManager(this) {}
436   ~OpenMPIRBuilder();
437 
438   /// Initialize the internal state, this will put structures types and
439   /// potentially other helpers into the underlying module. Must be called
440   /// before any other method and only once! This internal state includes
441   /// Types used in the OpenMPIRBuilder generated from OMPKinds.def as well
442   /// as loading offload metadata for device from the OpenMP host IR file
443   /// passed in as the HostFilePath argument.
444   /// \param HostFilePath The path to the host IR file, used to load in
445   /// offload metadata for the device, allowing host and device to
446   /// maintain the same metadata mapping.
447   void initialize(StringRef HostFilePath = {});
448 
449   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
450 
451   /// Finalize the underlying module, e.g., by outlining regions.
452   /// \param Fn                    The function to be finalized. If not used,
453   ///                              all functions are finalized.
454   void finalize(Function *Fn = nullptr);
455 
456   /// Add attributes known for \p FnID to \p Fn.
457   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
458 
459   /// Type used throughout for insertion points.
460   using InsertPointTy = IRBuilder<>::InsertPoint;
461 
462   /// Get the create a name using the platform specific separators.
463   /// \param Parts parts of the final name that needs separation
464   /// The created name has a first separator between the first and second part
465   /// and a second separator between all other parts.
466   /// E.g. with FirstSeparator "$" and Separator "." and
467   /// parts: "p1", "p2", "p3", "p4"
468   /// The resulting name is "p1$p2.p3.p4"
469   /// The separators are retrieved from the OpenMPIRBuilderConfig.
470   std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
471 
472   /// Callback type for variable finalization (think destructors).
473   ///
474   /// \param CodeGenIP is the insertion point at which the finalization code
475   ///                  should be placed.
476   ///
477   /// A finalize callback knows about all objects that need finalization, e.g.
478   /// destruction, when the scope of the currently generated construct is left
479   /// at the time, and location, the callback is invoked.
480   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
481 
482   struct FinalizationInfo {
483     /// The finalization callback provided by the last in-flight invocation of
484     /// createXXXX for the directive of kind DK.
485     FinalizeCallbackTy FiniCB;
486 
487     /// The directive kind of the innermost directive that has an associated
488     /// region which might require finalization when it is left.
489     omp::Directive DK;
490 
491     /// Flag to indicate if the directive is cancellable.
492     bool IsCancellable;
493   };
494 
495   /// Push a finalization callback on the finalization stack.
496   ///
497   /// NOTE: Temporary solution until Clang CG is gone.
498   void pushFinalizationCB(const FinalizationInfo &FI) {
499     FinalizationStack.push_back(FI);
500   }
501 
502   /// Pop the last finalization callback from the finalization stack.
503   ///
504   /// NOTE: Temporary solution until Clang CG is gone.
505   void popFinalizationCB() { FinalizationStack.pop_back(); }
506 
507   /// Callback type for body (=inner region) code generation
508   ///
509   /// The callback takes code locations as arguments, each describing a
510   /// location where additional instructions can be inserted.
511   ///
512   /// The CodeGenIP may be in the middle of a basic block or point to the end of
513   /// it. The basic block may have a terminator or be degenerate. The callback
514   /// function may just insert instructions at that position, but also split the
515   /// block (without the Before argument of BasicBlock::splitBasicBlock such
516   /// that the identify of the split predecessor block is preserved) and insert
517   /// additional control flow, including branches that do not lead back to what
518   /// follows the CodeGenIP. Note that since the callback is allowed to split
519   /// the block, callers must assume that InsertPoints to positions in the
520   /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
521   /// such InsertPoints need to be preserved, it can split the block itself
522   /// before calling the callback.
523   ///
524   /// AllocaIP and CodeGenIP must not point to the same position.
525   ///
526   /// \param AllocaIP is the insertion point at which new alloca instructions
527   ///                 should be placed. The BasicBlock it is pointing to must
528   ///                 not be split.
529   /// \param CodeGenIP is the insertion point at which the body code should be
530   ///                  placed.
531   using BodyGenCallbackTy =
532       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
533 
534   // This is created primarily for sections construct as llvm::function_ref
535   // (BodyGenCallbackTy) is not storable (as described in the comments of
536   // function_ref class - function_ref contains non-ownable reference
537   // to the callable.
538   using StorableBodyGenCallbackTy =
539       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
540 
541   /// Callback type for loop body code generation.
542   ///
543   /// \param CodeGenIP is the insertion point where the loop's body code must be
544   ///                  placed. This will be a dedicated BasicBlock with a
545   ///                  conditional branch from the loop condition check and
546   ///                  terminated with an unconditional branch to the loop
547   ///                  latch.
548   /// \param IndVar    is the induction variable usable at the insertion point.
549   using LoopBodyGenCallbackTy =
550       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
551 
552   /// Callback type for variable privatization (think copy & default
553   /// constructor).
554   ///
555   /// \param AllocaIP is the insertion point at which new alloca instructions
556   ///                 should be placed.
557   /// \param CodeGenIP is the insertion point at which the privatization code
558   ///                  should be placed.
559   /// \param Original The value being copied/created, should not be used in the
560   ///                 generated IR.
561   /// \param Inner The equivalent of \p Original that should be used in the
562   ///              generated IR; this is equal to \p Original if the value is
563   ///              a pointer and can thus be passed directly, otherwise it is
564   ///              an equivalent but different value.
565   /// \param ReplVal The replacement value, thus a copy or new created version
566   ///                of \p Inner.
567   ///
568   /// \returns The new insertion point where code generation continues and
569   ///          \p ReplVal the replacement value.
570   using PrivatizeCallbackTy = function_ref<InsertPointTy(
571       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
572       Value &Inner, Value *&ReplVal)>;
573 
574   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
575   /// (filename, line, column, ...).
576   struct LocationDescription {
577     LocationDescription(const IRBuilderBase &IRB)
578         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
579     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
580     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
581         : IP(IP), DL(DL) {}
582     InsertPointTy IP;
583     DebugLoc DL;
584   };
585 
586   /// Emitter methods for OpenMP directives.
587   ///
588   ///{
589 
590   /// Generator for '#omp barrier'
591   ///
592   /// \param Loc The location where the barrier directive was encountered.
593   /// \param DK The kind of directive that caused the barrier.
594   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
595   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
596   ///                        should be checked and acted upon.
597   ///
598   /// \returns The insertion point after the barrier.
599   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
600                               bool ForceSimpleCall = false,
601                               bool CheckCancelFlag = true);
602 
603   /// Generator for '#omp cancel'
604   ///
605   /// \param Loc The location where the directive was encountered.
606   /// \param IfCondition The evaluated 'if' clause expression, if any.
607   /// \param CanceledDirective The kind of directive that is cancled.
608   ///
609   /// \returns The insertion point after the barrier.
610   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
611                              omp::Directive CanceledDirective);
612 
613   /// Generator for '#omp parallel'
614   ///
615   /// \param Loc The insert and source location description.
616   /// \param AllocaIP The insertion points to be used for alloca instructions.
617   /// \param BodyGenCB Callback that will generate the region code.
618   /// \param PrivCB Callback to copy a given variable (think copy constructor).
619   /// \param FiniCB Callback to finalize variable copies.
620   /// \param IfCondition The evaluated 'if' clause expression, if any.
621   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
622   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
623   /// \param IsCancellable Flag to indicate a cancellable parallel region.
624   ///
625   /// \returns The insertion position *after* the parallel.
626   IRBuilder<>::InsertPoint
627   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
628                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
629                  FinalizeCallbackTy FiniCB, Value *IfCondition,
630                  Value *NumThreads, omp::ProcBindKind ProcBind,
631                  bool IsCancellable);
632 
633   /// Generator for the control flow structure of an OpenMP canonical loop.
634   ///
635   /// This generator operates on the logical iteration space of the loop, i.e.
636   /// the caller only has to provide a loop trip count of the loop as defined by
637   /// base language semantics. The trip count is interpreted as an unsigned
638   /// integer. The induction variable passed to \p BodyGenCB will be of the same
639   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
640   /// convert the logical iteration variable to the loop counter variable in the
641   /// loop body.
642   ///
643   /// \param Loc       The insert and source location description. The insert
644   ///                  location can be between two instructions or the end of a
645   ///                  degenerate block (e.g. a BB under construction).
646   /// \param BodyGenCB Callback that will generate the loop body code.
647   /// \param TripCount Number of iterations the loop body is executed.
648   /// \param Name      Base name used to derive BB and instruction names.
649   ///
650   /// \returns An object representing the created control flow structure which
651   ///          can be used for loop-associated directives.
652   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
653                                          LoopBodyGenCallbackTy BodyGenCB,
654                                          Value *TripCount,
655                                          const Twine &Name = "loop");
656 
657   /// Generator for the control flow structure of an OpenMP canonical loop.
658   ///
659   /// Instead of a logical iteration space, this allows specifying user-defined
660   /// loop counter values using increment, upper- and lower bounds. To
661   /// disambiguate the terminology when counting downwards, instead of lower
662   /// bounds we use \p Start for the loop counter value in the first body
663   /// iteration.
664   ///
665   /// Consider the following limitations:
666   ///
667   ///  * A loop counter space over all integer values of its bit-width cannot be
668   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
669   ///    stored into an 8 bit integer):
670   ///
671   ///      DO I = 0, 255, 1
672   ///
673   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
674   ///    effectively counting downwards:
675   ///
676   ///      for (uint8_t i = 100u; i > 0; i += 127u)
677   ///
678   ///
679   /// TODO: May need to add additional parameters to represent:
680   ///
681   ///  * Allow representing downcounting with unsigned integers.
682   ///
683   ///  * Sign of the step and the comparison operator might disagree:
684   ///
685   ///      for (int i = 0; i < 42; i -= 1u)
686   ///
687   //
688   /// \param Loc       The insert and source location description.
689   /// \param BodyGenCB Callback that will generate the loop body code.
690   /// \param Start     Value of the loop counter for the first iterations.
691   /// \param Stop      Loop counter values past this will stop the loop.
692   /// \param Step      Loop counter increment after each iteration; negative
693   ///                  means counting down.
694   /// \param IsSigned  Whether Start, Stop and Step are signed integers.
695   /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
696   ///                      counter.
697   /// \param ComputeIP Insertion point for instructions computing the trip
698   ///                  count. Can be used to ensure the trip count is available
699   ///                  at the outermost loop of a loop nest. If not set,
700   ///                  defaults to the preheader of the generated loop.
701   /// \param Name      Base name used to derive BB and instruction names.
702   ///
703   /// \returns An object representing the created control flow structure which
704   ///          can be used for loop-associated directives.
705   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
706                                          LoopBodyGenCallbackTy BodyGenCB,
707                                          Value *Start, Value *Stop, Value *Step,
708                                          bool IsSigned, bool InclusiveStop,
709                                          InsertPointTy ComputeIP = {},
710                                          const Twine &Name = "loop");
711 
712   /// Collapse a loop nest into a single loop.
713   ///
714   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
715   /// that has the same number of innermost loop iterations as the origin loop
716   /// nest. The induction variables of the input loops are derived from the
717   /// collapsed loop's induction variable. This is intended to be used to
718   /// implement OpenMP's collapse clause. Before applying a directive,
719   /// collapseLoops normalizes a loop nest to contain only a single loop and the
720   /// directive's implementation does not need to handle multiple loops itself.
721   /// This does not remove the need to handle all loop nest handling by
722   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
723   /// modifier of the worksharing-loop directive.
724   ///
725   /// Example:
726   /// \code
727   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
728   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
729   ///       body(i, j);
730   /// \endcode
731   ///
732   /// After collapsing with Loops={i,j}, the loop is changed to
733   /// \code
734   ///   for (int ij = 0; ij < 63; ++ij) {
735   ///     int i = ij / 9;
736   ///     int j = ij % 9;
737   ///     body(i, j);
738   ///   }
739   /// \endcode
740   ///
741   /// In the current implementation, the following limitations apply:
742   ///
743   ///  * All input loops have an induction variable of the same type.
744   ///
745   ///  * The collapsed loop will have the same trip count integer type as the
746   ///    input loops. Therefore it is possible that the collapsed loop cannot
747   ///    represent all iterations of the input loops. For instance, assuming a
748   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
749   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
750   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
751   ///    in this case.
752   ///
753   ///  * The trip counts of every input loop must be available at \p ComputeIP.
754   ///    Non-rectangular loops are not yet supported.
755   ///
756   ///  * At each nest level, code between a surrounding loop and its nested loop
757   ///    is hoisted into the loop body, and such code will be executed more
758   ///    often than before collapsing (or not at all if any inner loop iteration
759   ///    has a trip count of 0). This is permitted by the OpenMP specification.
760   ///
761   /// \param DL        Debug location for instructions added for collapsing,
762   ///                  such as instructions to compute/derive the input loop's
763   ///                  induction variables.
764   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
765   ///                  from outermost-to-innermost and every control flow of a
766   ///                  loop's body must pass through its directly nested loop.
767   /// \param ComputeIP Where additional instruction that compute the collapsed
768   ///                  trip count. If not set, defaults to before the generated
769   ///                  loop.
770   ///
771   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
772   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
773                                    ArrayRef<CanonicalLoopInfo *> Loops,
774                                    InsertPointTy ComputeIP);
775 
776   /// Get the default alignment value for given target
777   ///
778   /// \param TargetTriple   Target triple
779   /// \param Features       StringMap which describes extra CPU features
780   static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
781                                             const StringMap<bool> &Features);
782 
783   /// Retrieve (or create if non-existent) the address of a declare
784   /// target variable, used in conjunction with registerTargetGlobalVariable
785   /// to create declare target global variables.
786   ///
787   /// \param CaptureClause - enumerator corresponding to the OpenMP capture
788   /// clause used in conjunction with the variable being registered (link,
789   /// to, enter).
790   /// \param DeviceClause - enumerator corresponding to the OpenMP capture
791   /// clause used in conjunction with the variable being registered (nohost,
792   /// host, any)
793   /// \param IsDeclaration - boolean stating if the variable being registered
794   /// is a declaration-only and not a definition
795   /// \param IsExternallyVisible - boolean stating if the variable is externally
796   /// visible
797   /// \param EntryInfo - Unique entry information for the value generated
798   /// using getTargetEntryUniqueInfo, used to name generated pointer references
799   /// to the declare target variable
800   /// \param MangledName - the mangled name of the variable being registered
801   /// \param GeneratedRefs - references generated by invocations of
802   /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
803   /// these are required by Clang for book keeping.
804   /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
805   /// \param TargetTriple - The OpenMP device target triple we are compiling
806   /// for
807   /// \param LlvmPtrTy - The type of the variable we are generating or
808   /// retrieving an address for
809   /// \param GlobalInitializer - a lambda function which creates a constant
810   /// used for initializing a pointer reference to the variable in certain
811   /// cases. If a nullptr is passed, it will default to utilising the original
812   /// variable to initialize the pointer reference.
813   /// \param VariableLinkage - a lambda function which returns the variables
814   /// linkage type, if unspecified and a nullptr is given, it will instead
815   /// utilise the linkage stored on the existing global variable in the
816   /// LLVMModule.
817   Constant *getAddrOfDeclareTargetVar(
818       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
819       OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
820       bool IsDeclaration, bool IsExternallyVisible,
821       TargetRegionEntryInfo EntryInfo, StringRef MangledName,
822       std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
823       std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
824       std::function<Constant *()> GlobalInitializer,
825       std::function<GlobalValue::LinkageTypes()> VariableLinkage);
826 
827   /// Registers a target variable for device or host.
828   ///
829   /// \param CaptureClause - enumerator corresponding to the OpenMP capture
830   /// clause used in conjunction with the variable being registered (link,
831   /// to, enter).
832   /// \param DeviceClause - enumerator corresponding to the OpenMP capture
833   /// clause used in conjunction with the variable being registered (nohost,
834   /// host, any)
835   /// \param IsDeclaration - boolean stating if the variable being registered
836   /// is a declaration-only and not a definition
837   /// \param IsExternallyVisible - boolean stating if the variable is externally
838   /// visible
839   /// \param EntryInfo - Unique entry information for the value generated
840   /// using getTargetEntryUniqueInfo, used to name generated pointer references
841   /// to the declare target variable
842   /// \param MangledName - the mangled name of the variable being registered
843   /// \param GeneratedRefs - references generated by invocations of
844   /// registerTargetGlobalVariable these are required by Clang for book
845   /// keeping.
846   /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
847   /// \param TargetTriple - The OpenMP device target triple we are compiling
848   /// for
849   /// \param GlobalInitializer - a lambda function which creates a constant
850   /// used for initializing a pointer reference to the variable in certain
851   /// cases. If a nullptr is passed, it will default to utilising the original
852   /// variable to initialize the pointer reference.
853   /// \param VariableLinkage - a lambda function which returns the variables
854   /// linkage type, if unspecified and a nullptr is given, it will instead
855   /// utilise the linkage stored on the existing global variable in the
856   /// LLVMModule.
857   /// \param LlvmPtrTy - The type of the variable we are generating or
858   /// retrieving an address for
859   /// \param Addr - the original llvm value (addr) of the variable to be
860   /// registered
861   void registerTargetGlobalVariable(
862       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
863       OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
864       bool IsDeclaration, bool IsExternallyVisible,
865       TargetRegionEntryInfo EntryInfo, StringRef MangledName,
866       std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
867       std::vector<Triple> TargetTriple,
868       std::function<Constant *()> GlobalInitializer,
869       std::function<GlobalValue::LinkageTypes()> VariableLinkage,
870       Type *LlvmPtrTy, Constant *Addr);
871 
872 private:
873   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
874   ///
875   /// This takes a \p LoopInfo representing a canonical loop, such as the one
876   /// created by \p createCanonicalLoop and emits additional instructions to
877   /// turn it into a workshare loop. In particular, it calls to an OpenMP
878   /// runtime function in the preheader to obtain the loop bounds to be used in
879   /// the current thread, updates the relevant instructions in the canonical
880   /// loop and calls to an OpenMP runtime finalization function after the loop.
881   ///
882   /// \param DL       Debug location for instructions added for the
883   ///                 workshare-loop construct itself.
884   /// \param CLI      A descriptor of the canonical loop to workshare.
885   /// \param AllocaIP An insertion point for Alloca instructions usable in the
886   ///                 preheader of the loop.
887   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
888   ///                     the loop.
889   ///
890   /// \returns Point where to insert code after the workshare construct.
891   InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
892                                          InsertPointTy AllocaIP,
893                                          bool NeedsBarrier);
894 
895   /// Modifies the canonical loop a statically-scheduled workshare loop with a
896   /// user-specified chunk size.
897   ///
898   /// \param DL           Debug location for instructions added for the
899   ///                     workshare-loop construct itself.
900   /// \param CLI          A descriptor of the canonical loop to workshare.
901   /// \param AllocaIP     An insertion point for Alloca instructions usable in
902   ///                     the preheader of the loop.
903   /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
904   ///                     loop.
905   /// \param ChunkSize    The user-specified chunk size.
906   ///
907   /// \returns Point where to insert code after the workshare construct.
908   InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
909                                                 CanonicalLoopInfo *CLI,
910                                                 InsertPointTy AllocaIP,
911                                                 bool NeedsBarrier,
912                                                 Value *ChunkSize);
913 
914   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
915   ///
916   /// This takes a \p LoopInfo representing a canonical loop, such as the one
917   /// created by \p createCanonicalLoop and emits additional instructions to
918   /// turn it into a workshare loop. In particular, it calls to an OpenMP
919   /// runtime function in the preheader to obtain, and then in each iteration
920   /// to update the loop counter.
921   ///
922   /// \param DL       Debug location for instructions added for the
923   ///                 workshare-loop construct itself.
924   /// \param CLI      A descriptor of the canonical loop to workshare.
925   /// \param AllocaIP An insertion point for Alloca instructions usable in the
926   ///                 preheader of the loop.
927   /// \param SchedType Type of scheduling to be passed to the init function.
928   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
929   ///                     the loop.
930   /// \param Chunk    The size of loop chunk considered as a unit when
931   ///                 scheduling. If \p nullptr, defaults to 1.
932   ///
933   /// \returns Point where to insert code after the workshare construct.
934   InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
935                                           InsertPointTy AllocaIP,
936                                           omp::OMPScheduleType SchedType,
937                                           bool NeedsBarrier,
938                                           Value *Chunk = nullptr);
939 
940   /// Create alternative version of the loop to support if clause
941   ///
942   /// OpenMP if clause can require to generate second loop. This loop
943   /// will be executed when if clause condition is not met. createIfVersion
944   /// adds branch instruction to the copied loop if \p  ifCond is not met.
945   ///
946   /// \param Loop       Original loop which should be versioned.
947   /// \param IfCond     Value which corresponds to if clause condition
948   /// \param VMap       Value to value map to define relation between
949   ///                   original and copied loop values and loop blocks.
950   /// \param NamePrefix Optional name prefix for if.then if.else blocks.
951   void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
952                        ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
953 
954 public:
955   /// Modifies the canonical loop to be a workshare loop.
956   ///
957   /// This takes a \p LoopInfo representing a canonical loop, such as the one
958   /// created by \p createCanonicalLoop and emits additional instructions to
959   /// turn it into a workshare loop. In particular, it calls to an OpenMP
960   /// runtime function in the preheader to obtain the loop bounds to be used in
961   /// the current thread, updates the relevant instructions in the canonical
962   /// loop and calls to an OpenMP runtime finalization function after the loop.
963   ///
964   /// The concrete transformation is done by applyStaticWorkshareLoop,
965   /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
966   /// on the value of \p SchedKind and \p ChunkSize.
967   ///
968   /// \param DL       Debug location for instructions added for the
969   ///                 workshare-loop construct itself.
970   /// \param CLI      A descriptor of the canonical loop to workshare.
971   /// \param AllocaIP An insertion point for Alloca instructions usable in the
972   ///                 preheader of the loop.
973   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
974   ///                     the loop.
975   /// \param SchedKind Scheduling algorithm to use.
976   /// \param ChunkSize The chunk size for the inner loop.
977   /// \param HasSimdModifier Whether the simd modifier is present in the
978   ///                        schedule clause.
979   /// \param HasMonotonicModifier Whether the monotonic modifier is present in
980   ///                             the schedule clause.
981   /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
982   ///                                present in the schedule clause.
983   /// \param HasOrderedClause Whether the (parameterless) ordered clause is
984   ///                         present.
985   ///
986   /// \returns Point where to insert code after the workshare construct.
987   InsertPointTy applyWorkshareLoop(
988       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
989       bool NeedsBarrier,
990       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
991       Value *ChunkSize = nullptr, bool HasSimdModifier = false,
992       bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
993       bool HasOrderedClause = false);
994 
995   /// Tile a loop nest.
996   ///
997   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
998   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
999   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1000   /// of every loop and every tile sizes must be usable in the outermost
1001   /// loop's preheader. This implies that the loop nest is rectangular.
1002   ///
1003   /// Example:
1004   /// \code
1005   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
1006   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
1007   ///         body(i, j);
1008   /// \endcode
1009   ///
1010   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1011   /// \code
1012   ///   for (int i1 = 0; i1 < 3; ++i1)
1013   ///     for (int j1 = 0; j1 < 2; ++j1)
1014   ///       for (int i2 = 0; i2 < 5; ++i2)
1015   ///         for (int j2 = 0; j2 < 7; ++j2)
1016   ///           body(i1*3+i2, j1*3+j2);
1017   /// \endcode
1018   ///
1019   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1020   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1021   /// handles non-constant trip counts, non-constant tile sizes and trip counts
1022   /// that are not multiples of the tile size. In the latter case the tile loop
1023   /// of the last floor-loop iteration will have fewer iterations than specified
1024   /// as its tile size.
1025   ///
1026   ///
1027   /// @param DL        Debug location for instructions added by tiling, for
1028   ///                  instance the floor- and tile trip count computation.
1029   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
1030   ///                  invalidated by this method, i.e. should not used after
1031   ///                  tiling.
1032   /// @param TileSizes For each loop in \p Loops, the tile size for that
1033   ///                  dimensions.
1034   ///
1035   /// \returns A list of generated loops. Contains twice as many loops as the
1036   ///          input loop nest; the first half are the floor loops and the
1037   ///          second half are the tile loops.
1038   std::vector<CanonicalLoopInfo *>
1039   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
1040             ArrayRef<Value *> TileSizes);
1041 
1042   /// Fully unroll a loop.
1043   ///
1044   /// Instead of unrolling the loop immediately (and duplicating its body
1045   /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1046   /// metadata.
1047   ///
1048   /// \param DL   Debug location for instructions added by unrolling.
1049   /// \param Loop The loop to unroll. The loop will be invalidated.
1050   void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
1051 
1052   /// Fully or partially unroll a loop. How the loop is unrolled is determined
1053   /// using LLVM's LoopUnrollPass.
1054   ///
1055   /// \param DL   Debug location for instructions added by unrolling.
1056   /// \param Loop The loop to unroll. The loop will be invalidated.
1057   void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
1058 
1059   /// Partially unroll a loop.
1060   ///
1061   /// The CanonicalLoopInfo of the unrolled loop for use with chained
1062   /// loop-associated directive can be requested using \p UnrolledCLI. Not
1063   /// needing the CanonicalLoopInfo allows more efficient code generation by
1064   /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1065   /// A loop-associated directive applied to the unrolled loop needs to know the
1066   /// new trip count which means that if using a heuristically determined unroll
1067   /// factor (\p Factor == 0), that factor must be computed immediately. We are
1068   /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1069   /// but which assumes that some canonicalization has taken place (e.g.
1070   /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1071   /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1072   ///
1073   /// \param DL          Debug location for instructions added by unrolling.
1074   /// \param Loop        The loop to unroll. The loop will be invalidated.
1075   /// \param Factor      The factor to unroll the loop by. A factor of 0
1076   ///                    indicates that a heuristic should be used to determine
1077   ///                    the unroll-factor.
1078   /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1079   ///                    partially unrolled loop. Otherwise, uses loop metadata
1080   ///                    to defer unrolling to the LoopUnrollPass.
1081   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1082                          CanonicalLoopInfo **UnrolledCLI);
1083 
1084   /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1085   /// is cloned. The metadata which prevents vectorization is added to
1086   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1087   /// to false.
1088   ///
1089   /// \param Loop        The loop to simd-ize.
1090   /// \param AlignedVars The map which containts pairs of the pointer
1091   ///                    and its corresponding alignment.
1092   /// \param IfCond      The value which corresponds to the if clause
1093   ///                    condition.
1094   /// \param Order       The enum to map order clause.
1095   /// \param Simdlen     The Simdlen length to apply to the simd loop.
1096   /// \param Safelen     The Safelen length to apply to the simd loop.
1097   void applySimd(CanonicalLoopInfo *Loop,
1098                  MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1099                  omp::OrderKind Order, ConstantInt *Simdlen,
1100                  ConstantInt *Safelen);
1101 
1102   /// Generator for '#omp flush'
1103   ///
1104   /// \param Loc The location where the flush directive was encountered
1105   void createFlush(const LocationDescription &Loc);
1106 
1107   /// Generator for '#omp taskwait'
1108   ///
1109   /// \param Loc The location where the taskwait directive was encountered.
1110   void createTaskwait(const LocationDescription &Loc);
1111 
1112   /// Generator for '#omp taskyield'
1113   ///
1114   /// \param Loc The location where the taskyield directive was encountered.
1115   void createTaskyield(const LocationDescription &Loc);
1116 
1117   /// A struct to pack the relevant information for an OpenMP depend clause.
1118   struct DependData {
1119     omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
1120     Type *DepValueType;
1121     Value *DepVal;
1122     explicit DependData() = default;
1123     DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
1124                Value *DepVal)
1125         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
1126   };
1127 
1128   /// Generator for `#omp task`
1129   ///
1130   /// \param Loc The location where the task construct was encountered.
1131   /// \param AllocaIP The insertion point to be used for alloca instructions.
1132   /// \param BodyGenCB Callback that will generate the region code.
1133   /// \param Tied True if the task is tied, false if the task is untied.
1134   /// \param Final i1 value which is `true` if the task is final, `false` if the
1135   ///              task is not final.
1136   /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1137   ///                    task is generated, and the encountering thread must
1138   ///                    suspend the current task region, for which execution
1139   ///                    cannot be resumed until execution of the structured
1140   ///                    block that is associated with the generated task is
1141   ///                    completed.
1142   InsertPointTy createTask(const LocationDescription &Loc,
1143                            InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1144                            bool Tied = true, Value *Final = nullptr,
1145                            Value *IfCondition = nullptr,
1146                            SmallVector<DependData> Dependencies = {});
1147 
1148   /// Generator for the taskgroup construct
1149   ///
1150   /// \param Loc The location where the taskgroup construct was encountered.
1151   /// \param AllocaIP The insertion point to be used for alloca instructions.
1152   /// \param BodyGenCB Callback that will generate the region code.
1153   InsertPointTy createTaskgroup(const LocationDescription &Loc,
1154                                 InsertPointTy AllocaIP,
1155                                 BodyGenCallbackTy BodyGenCB);
1156 
1157 
1158   using FileIdentifierInfoCallbackTy = std::function<std::tuple<std::string, uint64_t>()>;
1159 
1160   /// Creates a unique info for a target entry when provided a filename and
1161   /// line number from.
1162   ///
1163   /// \param CallBack A callback function which should return filename the entry
1164   /// resides in as well as the line number for the target entry
1165   /// \param ParentName The name of the parent the target entry resides in, if
1166   /// any.
1167   static TargetRegionEntryInfo
1168   getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
1169                            StringRef ParentName = "");
1170 
1171   /// Functions used to generate reductions. Such functions take two Values
1172   /// representing LHS and RHS of the reduction, respectively, and a reference
1173   /// to the value that is updated to refer to the reduction result.
1174   using ReductionGenTy =
1175       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
1176 
1177   /// Functions used to generate atomic reductions. Such functions take two
1178   /// Values representing pointers to LHS and RHS of the reduction, as well as
1179   /// the element type of these pointers. They are expected to atomically
1180   /// update the LHS to the reduced value.
1181   using AtomicReductionGenTy =
1182       function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
1183 
1184   /// Information about an OpenMP reduction.
1185   struct ReductionInfo {
1186     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
1187                   ReductionGenTy ReductionGen,
1188                   AtomicReductionGenTy AtomicReductionGen)
1189         : ElementType(ElementType), Variable(Variable),
1190           PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
1191           AtomicReductionGen(AtomicReductionGen) {}
1192 
1193     /// Reduction element type, must match pointee type of variable.
1194     Type *ElementType;
1195 
1196     /// Reduction variable of pointer type.
1197     Value *Variable;
1198 
1199     /// Thread-private partial reduction variable.
1200     Value *PrivateVariable;
1201 
1202     /// Callback for generating the reduction body. The IR produced by this will
1203     /// be used to combine two values in a thread-safe context, e.g., under
1204     /// lock or within the same thread, and therefore need not be atomic.
1205     ReductionGenTy ReductionGen;
1206 
1207     /// Callback for generating the atomic reduction body, may be null. The IR
1208     /// produced by this will be used to atomically combine two values during
1209     /// reduction. If null, the implementation will use the non-atomic version
1210     /// along with the appropriate synchronization mechanisms.
1211     AtomicReductionGenTy AtomicReductionGen;
1212   };
1213 
1214   // TODO: provide atomic and non-atomic reduction generators for reduction
1215   // operators defined by the OpenMP specification.
1216 
1217   /// Generator for '#omp reduction'.
1218   ///
1219   /// Emits the IR instructing the runtime to perform the specific kind of
1220   /// reductions. Expects reduction variables to have been privatized and
1221   /// initialized to reduction-neutral values separately. Emits the calls to
1222   /// runtime functions as well as the reduction function and the basic blocks
1223   /// performing the reduction atomically and non-atomically.
1224   ///
1225   /// The code emitted for the following:
1226   ///
1227   /// \code
1228   ///   type var_1;
1229   ///   type var_2;
1230   ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1231   ///   /* body */;
1232   /// \endcode
1233   ///
1234   /// corresponds to the following sketch.
1235   ///
1236   /// \code
1237   /// void _outlined_par() {
1238   ///   // N is the number of different reductions.
1239   ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1240   ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1241   ///                        _omp_reduction_func,
1242   ///                        _gomp_critical_user.reduction.var)) {
1243   ///   case 1: {
1244   ///     var_1 = var_1 <reduction-op> privatized_var_1;
1245   ///     var_2 = var_2 <reduction-op> privatized_var_2;
1246   ///     // ...
1247   ///    __kmpc_end_reduce(...);
1248   ///     break;
1249   ///   }
1250   ///   case 2: {
1251   ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
1252   ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
1253   ///     // ...
1254   ///     break;
1255   ///   }
1256   ///   default: break;
1257   ///   }
1258   /// }
1259   ///
1260   /// void _omp_reduction_func(void **lhs, void **rhs) {
1261   ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1262   ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1263   ///   // ...
1264   /// }
1265   /// \endcode
1266   ///
1267   /// \param Loc                The location where the reduction was
1268   ///                           encountered. Must be within the associate
1269   ///                           directive and after the last local access to the
1270   ///                           reduction variables.
1271   /// \param AllocaIP           An insertion point suitable for allocas usable
1272   ///                           in reductions.
1273   /// \param ReductionInfos     A list of info on each reduction variable.
1274   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
1275   InsertPointTy createReductions(const LocationDescription &Loc,
1276                                  InsertPointTy AllocaIP,
1277                                  ArrayRef<ReductionInfo> ReductionInfos,
1278                                  bool IsNoWait = false);
1279 
1280   ///}
1281 
1282   /// Return the insertion point used by the underlying IRBuilder.
1283   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
1284 
1285   /// Update the internal location to \p Loc.
1286   bool updateToLocation(const LocationDescription &Loc) {
1287     Builder.restoreIP(Loc.IP);
1288     Builder.SetCurrentDebugLocation(Loc.DL);
1289     return Loc.IP.getBlock() != nullptr;
1290   }
1291 
1292   /// Return the function declaration for the runtime function with \p FnID.
1293   FunctionCallee getOrCreateRuntimeFunction(Module &M,
1294                                             omp::RuntimeFunction FnID);
1295 
1296   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
1297 
1298   /// Return the (LLVM-IR) string describing the source location \p LocStr.
1299   Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1300 
1301   /// Return the (LLVM-IR) string describing the default source location.
1302   Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
1303 
1304   /// Return the (LLVM-IR) string describing the source location identified by
1305   /// the arguments.
1306   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1307                                  unsigned Line, unsigned Column,
1308                                  uint32_t &SrcLocStrSize);
1309 
1310   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1311   /// fallback if \p DL does not specify the function name.
1312   Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
1313                                  Function *F = nullptr);
1314 
1315   /// Return the (LLVM-IR) string describing the source location \p Loc.
1316   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1317                                  uint32_t &SrcLocStrSize);
1318 
1319   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1320   /// TODO: Create a enum class for the Reserve2Flags
1321   Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1322                              omp::IdentFlag Flags = omp::IdentFlag(0),
1323                              unsigned Reserve2Flags = 0);
1324 
1325   /// Create a hidden global flag \p Name in the module with initial value \p
1326   /// Value.
1327   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
1328 
1329   /// Create an offloading section struct used to register this global at
1330   /// runtime.
1331   ///
1332   /// Type struct __tgt_offload_entry{
1333   ///   void    *addr;      // Pointer to the offload entry info.
1334   ///                       // (function or global)
1335   ///   char    *name;      // Name of the function or global.
1336   ///   size_t  size;       // Size of the entry info (0 if it a function).
1337   ///   int32_t flags;
1338   ///   int32_t reserved;
1339   /// };
1340   ///
1341   /// \param Addr The pointer to the global being registered.
1342   /// \param Name The symbol name associated with the global.
1343   /// \param Size The size in bytes of the global (0 for functions).
1344   /// \param Flags Flags associated with the entry.
1345   /// \param SectionName The section this entry will be placed at.
1346   void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
1347                            int32_t Flags,
1348                            StringRef SectionName = "omp_offloading_entries");
1349 
1350   /// Generate control flow and cleanup for cancellation.
1351   ///
1352   /// \param CancelFlag Flag indicating if the cancellation is performed.
1353   /// \param CanceledDirective The kind of directive that is cancled.
1354   /// \param ExitCB Extra code to be generated in the exit block.
1355   void emitCancelationCheckImpl(Value *CancelFlag,
1356                                 omp::Directive CanceledDirective,
1357                                 FinalizeCallbackTy ExitCB = {});
1358 
1359   /// Generate a target region entry call.
1360   ///
1361   /// \param Loc The location at which the request originated and is fulfilled.
1362   /// \param AllocaIP The insertion point to be used for alloca instructions.
1363   /// \param Return Return value of the created function returned by reference.
1364   /// \param DeviceID Identifier for the device via the 'device' clause.
1365   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1366   ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
1367   /// \param NumThreads Number of threads via the 'thread_limit' clause.
1368   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1369   /// \param KernelArgs Array of arguments to the kernel.
1370   InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1371                                  InsertPointTy AllocaIP, Value *&Return,
1372                                  Value *Ident, Value *DeviceID, Value *NumTeams,
1373                                  Value *NumThreads, Value *HostPtr,
1374                                  ArrayRef<Value *> KernelArgs);
1375 
1376   /// Generate a barrier runtime call.
1377   ///
1378   /// \param Loc The location at which the request originated and is fulfilled.
1379   /// \param DK The directive which caused the barrier
1380   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1381   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1382   ///                        should be checked and acted upon.
1383   ///
1384   /// \returns The insertion point after the barrier.
1385   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1386                                 omp::Directive DK, bool ForceSimpleCall,
1387                                 bool CheckCancelFlag);
1388 
1389   /// Generate a flush runtime call.
1390   ///
1391   /// \param Loc The location at which the request originated and is fulfilled.
1392   void emitFlush(const LocationDescription &Loc);
1393 
1394   /// The finalization stack made up of finalize callbacks currently in-flight,
1395   /// wrapped into FinalizationInfo objects that reference also the finalization
1396   /// target block and the kind of cancellable directive.
1397   SmallVector<FinalizationInfo, 8> FinalizationStack;
1398 
1399   /// Return true if the last entry in the finalization stack is of kind \p DK
1400   /// and cancellable.
1401   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1402     return !FinalizationStack.empty() &&
1403            FinalizationStack.back().IsCancellable &&
1404            FinalizationStack.back().DK == DK;
1405   }
1406 
1407   /// Generate a taskwait runtime call.
1408   ///
1409   /// \param Loc The location at which the request originated and is fulfilled.
1410   void emitTaskwaitImpl(const LocationDescription &Loc);
1411 
1412   /// Generate a taskyield runtime call.
1413   ///
1414   /// \param Loc The location at which the request originated and is fulfilled.
1415   void emitTaskyieldImpl(const LocationDescription &Loc);
1416 
1417   /// Return the current thread ID.
1418   ///
1419   /// \param Ident The ident (ident_t*) describing the query origin.
1420   Value *getOrCreateThreadID(Value *Ident);
1421 
1422   /// The OpenMPIRBuilder Configuration
1423   OpenMPIRBuilderConfig Config;
1424 
1425   /// The underlying LLVM-IR module
1426   Module &M;
1427 
1428   /// The LLVM-IR Builder used to create IR.
1429   IRBuilder<> Builder;
1430 
1431   /// Map to remember source location strings
1432   StringMap<Constant *> SrcLocStrMap;
1433 
1434   /// Map to remember existing ident_t*.
1435   DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
1436 
1437   /// Info manager to keep track of target regions.
1438   OffloadEntriesInfoManager OffloadInfoManager;
1439 
1440   /// Helper that contains information about regions we need to outline
1441   /// during finalization.
1442   struct OutlineInfo {
1443     using PostOutlineCBTy = std::function<void(Function &)>;
1444     PostOutlineCBTy PostOutlineCB;
1445     BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
1446     SmallVector<Value *, 2> ExcludeArgsFromAggregate;
1447 
1448     /// Collect all blocks in between EntryBB and ExitBB in both the given
1449     /// vector and set.
1450     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
1451                        SmallVectorImpl<BasicBlock *> &BlockVector);
1452 
1453     /// Return the function that contains the region to be outlined.
1454     Function *getFunction() const { return EntryBB->getParent(); }
1455   };
1456 
1457   /// Collection of regions that need to be outlined during finalization.
1458   SmallVector<OutlineInfo, 16> OutlineInfos;
1459 
1460   /// Collection of owned canonical loop objects that eventually need to be
1461   /// free'd.
1462   std::forward_list<CanonicalLoopInfo> LoopInfos;
1463 
1464   /// Add a new region that will be outlined later.
1465   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1466 
1467   /// An ordered map of auto-generated variables to their unique names.
1468   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1469   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1470   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1471   /// variables.
1472   StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars;
1473 
1474   /// Computes the size of type in bytes.
1475   Value *getSizeInBytes(Value *BasePtr);
1476 
1477   // Emit a branch from the current block to the Target block only if
1478   // the current block has a terminator.
1479   void emitBranch(BasicBlock *Target);
1480 
1481   // If BB has no use then delete it and return. Else place BB after the current
1482   // block, if possible, or else at the end of the function. Also add a branch
1483   // from current block to BB if current block does not have a terminator.
1484   void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1485 
1486   /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1487   /// Here is the logic:
1488   /// if (Cond) {
1489   ///   ThenGen();
1490   /// } else {
1491   ///   ElseGen();
1492   /// }
1493   void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
1494                     BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1495 
1496   /// Create the global variable holding the offload mappings information.
1497   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
1498                                         std::string VarName);
1499 
1500   /// Create the global variable holding the offload names information.
1501   GlobalVariable *
1502   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
1503                         std::string VarName);
1504 
1505   struct MapperAllocas {
1506     AllocaInst *ArgsBase = nullptr;
1507     AllocaInst *Args = nullptr;
1508     AllocaInst *ArgSizes = nullptr;
1509   };
1510 
1511   /// Create the allocas instruction used in call to mapper functions.
1512   void createMapperAllocas(const LocationDescription &Loc,
1513                            InsertPointTy AllocaIP, unsigned NumOperands,
1514                            struct MapperAllocas &MapperAllocas);
1515 
1516   /// Create the call for the target mapper function.
1517   /// \param Loc The source location description.
1518   /// \param MapperFunc Function to be called.
1519   /// \param SrcLocInfo Source location information global.
1520   /// \param MaptypesArg The argument types.
1521   /// \param MapnamesArg The argument names.
1522   /// \param MapperAllocas The AllocaInst used for the call.
1523   /// \param DeviceID Device ID for the call.
1524   /// \param NumOperands Number of operands in the call.
1525   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1526                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1527                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1528                       unsigned NumOperands);
1529 
1530   /// Container for the arguments used to pass data to the runtime library.
1531   struct TargetDataRTArgs {
1532     /// The array of base pointer passed to the runtime library.
1533     Value *BasePointersArray = nullptr;
1534     /// The array of section pointers passed to the runtime library.
1535     Value *PointersArray = nullptr;
1536     /// The array of sizes passed to the runtime library.
1537     Value *SizesArray = nullptr;
1538     /// The array of map types passed to the runtime library for the beginning
1539     /// of the region or for the entire region if there are no separate map
1540     /// types for the region end.
1541     Value *MapTypesArray = nullptr;
1542     /// The array of map types passed to the runtime library for the end of the
1543     /// region, or nullptr if there are no separate map types for the region
1544     /// end.
1545     Value *MapTypesArrayEnd = nullptr;
1546     /// The array of user-defined mappers passed to the runtime library.
1547     Value *MappersArray = nullptr;
1548     /// The array of original declaration names of mapped pointers sent to the
1549     /// runtime library for debugging
1550     Value *MapNamesArray = nullptr;
1551 
1552     explicit TargetDataRTArgs() {}
1553     explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray,
1554                               Value *SizesArray, Value *MapTypesArray,
1555                               Value *MapTypesArrayEnd, Value *MappersArray,
1556                               Value *MapNamesArray)
1557         : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
1558           SizesArray(SizesArray), MapTypesArray(MapTypesArray),
1559           MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray),
1560           MapNamesArray(MapNamesArray) {}
1561   };
1562 
1563   /// Data structure that contains the needed information to construct the
1564   /// kernel args vector.
1565   struct TargetKernelArgs {
1566     /// Number of arguments passed to the runtime library.
1567     unsigned NumTargetItems;
1568     /// Arguments passed to the runtime library
1569     TargetDataRTArgs RTArgs;
1570     /// The number of iterations
1571     Value *NumIterations;
1572     /// The number of teams.
1573     Value *NumTeams;
1574     /// The number of threads.
1575     Value *NumThreads;
1576     /// The size of the dynamic shared memory.
1577     Value *DynCGGroupMem;
1578     /// True if the kernel has 'no wait' clause.
1579     bool HasNoWait;
1580 
1581     /// Constructor for TargetKernelArgs
1582     TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
1583                      Value *NumIterations, Value *NumTeams, Value *NumThreads,
1584                      Value *DynCGGroupMem, bool HasNoWait)
1585         : NumTargetItems(NumTargetItems), RTArgs(RTArgs),
1586           NumIterations(NumIterations), NumTeams(NumTeams),
1587           NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem),
1588           HasNoWait(HasNoWait) {}
1589   };
1590 
1591   /// Create the kernel args vector used by emitTargetKernel. This function
1592   /// creates various constant values that are used in the resulting args
1593   /// vector.
1594   static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1595                                   IRBuilderBase &Builder,
1596                                   SmallVector<Value *> &ArgsVector);
1597 
1598   /// Struct that keeps the information that should be kept throughout
1599   /// a 'target data' region.
1600   class TargetDataInfo {
1601     /// Set to true if device pointer information have to be obtained.
1602     bool RequiresDevicePointerInfo = false;
1603     /// Set to true if Clang emits separate runtime calls for the beginning and
1604     /// end of the region.  These calls might have separate map type arrays.
1605     bool SeparateBeginEndCalls = false;
1606 
1607   public:
1608     TargetDataRTArgs RTArgs;
1609 
1610     SmallMapVector<const Value *, std::pair<Value *, Value *>, 4>
1611         DevicePtrInfoMap;
1612 
1613     /// Indicate whether any user-defined mapper exists.
1614     bool HasMapper = false;
1615     /// The total number of pointers passed to the runtime library.
1616     unsigned NumberOfPtrs = 0u;
1617 
1618     explicit TargetDataInfo() {}
1619     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1620                             bool SeparateBeginEndCalls)
1621         : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1622           SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1623     /// Clear information about the data arrays.
1624     void clearArrayInfo() {
1625       RTArgs = TargetDataRTArgs();
1626       HasMapper = false;
1627       NumberOfPtrs = 0u;
1628     }
1629     /// Return true if the current target data information has valid arrays.
1630     bool isValid() {
1631       return RTArgs.BasePointersArray && RTArgs.PointersArray &&
1632              RTArgs.SizesArray && RTArgs.MapTypesArray &&
1633              (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
1634     }
1635     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1636     bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1637   };
1638 
1639   enum class DeviceInfoTy { None, Pointer, Address };
1640   using MapValuesArrayTy = SmallVector<Value *, 4>;
1641   using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>;
1642   using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>;
1643   using MapNamesArrayTy = SmallVector<Constant *, 4>;
1644   using MapDimArrayTy = SmallVector<uint64_t, 4>;
1645   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
1646 
1647   /// This structure contains combined information generated for mappable
1648   /// clauses, including base pointers, pointers, sizes, map types, user-defined
1649   /// mappers, and non-contiguous information.
1650   struct MapInfosTy {
1651     struct StructNonContiguousInfo {
1652       bool IsNonContiguous = false;
1653       MapDimArrayTy Dims;
1654       MapNonContiguousArrayTy Offsets;
1655       MapNonContiguousArrayTy Counts;
1656       MapNonContiguousArrayTy Strides;
1657     };
1658     MapValuesArrayTy BasePointers;
1659     MapValuesArrayTy Pointers;
1660     MapDeviceInfoArrayTy DevicePointers;
1661     MapValuesArrayTy Sizes;
1662     MapFlagsArrayTy Types;
1663     MapNamesArrayTy Names;
1664     StructNonContiguousInfo NonContigInfo;
1665 
1666     /// Append arrays in \a CurInfo.
1667     void append(MapInfosTy &CurInfo) {
1668       BasePointers.append(CurInfo.BasePointers.begin(),
1669                           CurInfo.BasePointers.end());
1670       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1671       DevicePointers.append(CurInfo.DevicePointers.begin(),
1672                             CurInfo.DevicePointers.end());
1673       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1674       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1675       Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1676       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
1677                                 CurInfo.NonContigInfo.Dims.end());
1678       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
1679                                    CurInfo.NonContigInfo.Offsets.end());
1680       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
1681                                   CurInfo.NonContigInfo.Counts.end());
1682       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
1683                                    CurInfo.NonContigInfo.Strides.end());
1684     }
1685   };
1686 
1687   /// Callback function type for functions emitting the host fallback code that
1688   /// is executed when the kernel launch fails. It takes an insertion point as
1689   /// parameter where the code should be emitted. It returns an insertion point
1690   /// that points right after after the emitted code.
1691   using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>;
1692 
1693   /// Generate a target region entry call and host fallback call.
1694   ///
1695   /// \param Loc The location at which the request originated and is fulfilled.
1696   /// \param OutlinedFn The outlined kernel function.
1697   /// \param OutlinedFnID The ooulined function ID.
1698   /// \param EmitTargetCallFallbackCB Call back function to generate host
1699   ///        fallback code.
1700   /// \param Args Data structure holding information about the kernel arguments.
1701   /// \param DeviceID Identifier for the device via the 'device' clause.
1702   /// \param RTLoc Source location identifier
1703   /// \param AllocaIP The insertion point to be used for alloca instructions.
1704   InsertPointTy emitKernelLaunch(
1705       const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1706       EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1707       Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1708 
1709   /// Emit the arguments to be passed to the runtime library based on the
1710   /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
1711   /// ForEndCall, emit map types to be passed for the end of the region instead
1712   /// of the beginning.
1713   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
1714                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
1715                                     OpenMPIRBuilder::TargetDataInfo &Info,
1716                                     bool EmitDebug = false,
1717                                     bool ForEndCall = false);
1718 
1719   /// Emit an array of struct descriptors to be assigned to the offload args.
1720   void emitNonContiguousDescriptor(InsertPointTy AllocaIP,
1721                                    InsertPointTy CodeGenIP,
1722                                    MapInfosTy &CombinedInfo,
1723                                    TargetDataInfo &Info);
1724 
1725   /// Emit the arrays used to pass the captures and map information to the
1726   /// offloading runtime library. If there is no map or capture information,
1727   /// return nullptr by reference.
1728   void emitOffloadingArrays(
1729       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1730       TargetDataInfo &Info, bool IsNonContiguous = false,
1731       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1732       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1733 
1734   /// Creates offloading entry for the provided entry ID \a ID, address \a
1735   /// Addr, size \a Size, and flags \a Flags.
1736   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
1737                           int32_t Flags, GlobalValue::LinkageTypes);
1738 
1739   /// The kind of errors that can occur when emitting the offload entries and
1740   /// metadata.
1741   enum EmitMetadataErrorKind {
1742     EMIT_MD_TARGET_REGION_ERROR,
1743     EMIT_MD_DECLARE_TARGET_ERROR,
1744     EMIT_MD_GLOBAL_VAR_LINK_ERROR
1745   };
1746 
1747   /// Callback function type
1748   using EmitMetadataErrorReportFunctionTy =
1749       std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1750 
1751   // Emit the offloading entries and metadata so that the device codegen side
1752   // can easily figure out what to emit. The produced metadata looks like
1753   // this:
1754   //
1755   // !omp_offload.info = !{!1, ...}
1756   //
1757   // We only generate metadata for function that contain target regions.
1758   void createOffloadEntriesAndInfoMetadata(
1759       EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1760 
1761 public:
1762   /// Generator for __kmpc_copyprivate
1763   ///
1764   /// \param Loc The source location description.
1765   /// \param BufSize Number of elements in the buffer.
1766   /// \param CpyBuf List of pointers to data to be copied.
1767   /// \param CpyFn function to call for copying data.
1768   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1769   ///
1770   /// \return The insertion position *after* the CopyPrivate call.
1771 
1772   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1773                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
1774                                   llvm::Value *CpyFn, llvm::Value *DidIt);
1775 
1776   /// Generator for '#omp single'
1777   ///
1778   /// \param Loc The source location description.
1779   /// \param BodyGenCB Callback that will generate the region code.
1780   /// \param FiniCB Callback to finalize variable copies.
1781   /// \param IsNowait If false, a barrier is emitted.
1782   /// \param DidIt Local variable used as a flag to indicate 'single' thread
1783   ///
1784   /// \returns The insertion position *after* the single call.
1785   InsertPointTy createSingle(const LocationDescription &Loc,
1786                              BodyGenCallbackTy BodyGenCB,
1787                              FinalizeCallbackTy FiniCB, bool IsNowait,
1788                              llvm::Value *DidIt);
1789 
1790   /// Generator for '#omp master'
1791   ///
1792   /// \param Loc The insert and source location description.
1793   /// \param BodyGenCB Callback that will generate the region code.
1794   /// \param FiniCB Callback to finalize variable copies.
1795   ///
1796   /// \returns The insertion position *after* the master.
1797   InsertPointTy createMaster(const LocationDescription &Loc,
1798                              BodyGenCallbackTy BodyGenCB,
1799                              FinalizeCallbackTy FiniCB);
1800 
1801   /// Generator for '#omp masked'
1802   ///
1803   /// \param Loc The insert and source location description.
1804   /// \param BodyGenCB Callback that will generate the region code.
1805   /// \param FiniCB Callback to finialize variable copies.
1806   ///
1807   /// \returns The insertion position *after* the masked.
1808   InsertPointTy createMasked(const LocationDescription &Loc,
1809                              BodyGenCallbackTy BodyGenCB,
1810                              FinalizeCallbackTy FiniCB, Value *Filter);
1811 
1812   /// Generator for '#omp critical'
1813   ///
1814   /// \param Loc The insert and source location description.
1815   /// \param BodyGenCB Callback that will generate the region body code.
1816   /// \param FiniCB Callback to finalize variable copies.
1817   /// \param CriticalName name of the lock used by the critical directive
1818   /// \param HintInst Hint Instruction for hint clause associated with critical
1819   ///
1820   /// \returns The insertion position *after* the critical.
1821   InsertPointTy createCritical(const LocationDescription &Loc,
1822                                BodyGenCallbackTy BodyGenCB,
1823                                FinalizeCallbackTy FiniCB,
1824                                StringRef CriticalName, Value *HintInst);
1825 
1826   /// Generator for '#omp ordered depend (source | sink)'
1827   ///
1828   /// \param Loc The insert and source location description.
1829   /// \param AllocaIP The insertion point to be used for alloca instructions.
1830   /// \param NumLoops The number of loops in depend clause.
1831   /// \param StoreValues The value will be stored in vector address.
1832   /// \param Name The name of alloca instruction.
1833   /// \param IsDependSource If true, depend source; otherwise, depend sink.
1834   ///
1835   /// \return The insertion position *after* the ordered.
1836   InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1837                                     InsertPointTy AllocaIP, unsigned NumLoops,
1838                                     ArrayRef<llvm::Value *> StoreValues,
1839                                     const Twine &Name, bool IsDependSource);
1840 
1841   /// Generator for '#omp ordered [threads | simd]'
1842   ///
1843   /// \param Loc The insert and source location description.
1844   /// \param BodyGenCB Callback that will generate the region code.
1845   /// \param FiniCB Callback to finalize variable copies.
1846   /// \param IsThreads If true, with threads clause or without clause;
1847   /// otherwise, with simd clause;
1848   ///
1849   /// \returns The insertion position *after* the ordered.
1850   InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1851                                          BodyGenCallbackTy BodyGenCB,
1852                                          FinalizeCallbackTy FiniCB,
1853                                          bool IsThreads);
1854 
1855   /// Generator for '#omp sections'
1856   ///
1857   /// \param Loc The insert and source location description.
1858   /// \param AllocaIP The insertion points to be used for alloca instructions.
1859   /// \param SectionCBs Callbacks that will generate body of each section.
1860   /// \param PrivCB Callback to copy a given variable (think copy constructor).
1861   /// \param FiniCB Callback to finalize variable copies.
1862   /// \param IsCancellable Flag to indicate a cancellable parallel region.
1863   /// \param IsNowait If true, barrier - to ensure all sections are executed
1864   /// before moving forward will not be generated.
1865   /// \returns The insertion position *after* the sections.
1866   InsertPointTy createSections(const LocationDescription &Loc,
1867                                InsertPointTy AllocaIP,
1868                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1869                                PrivatizeCallbackTy PrivCB,
1870                                FinalizeCallbackTy FiniCB, bool IsCancellable,
1871                                bool IsNowait);
1872 
1873   /// Generator for '#omp section'
1874   ///
1875   /// \param Loc The insert and source location description.
1876   /// \param BodyGenCB Callback that will generate the region body code.
1877   /// \param FiniCB Callback to finalize variable copies.
1878   /// \returns The insertion position *after* the section.
1879   InsertPointTy createSection(const LocationDescription &Loc,
1880                               BodyGenCallbackTy BodyGenCB,
1881                               FinalizeCallbackTy FiniCB);
1882 
1883   /// Generate conditional branch and relevant BasicBlocks through which private
1884   /// threads copy the 'copyin' variables from Master copy to threadprivate
1885   /// copies.
1886   ///
1887   /// \param IP insertion block for copyin conditional
1888   /// \param MasterVarPtr a pointer to the master variable
1889   /// \param PrivateVarPtr a pointer to the threadprivate variable
1890   /// \param IntPtrTy Pointer size type
1891   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1892   //				 and copy.in.end block
1893   ///
1894   /// \returns The insertion point where copying operation to be emitted.
1895   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1896                                          Value *PrivateAddr,
1897                                          llvm::IntegerType *IntPtrTy,
1898                                          bool BranchtoEnd = true);
1899 
1900   /// Create a runtime call for kmpc_Alloc
1901   ///
1902   /// \param Loc The insert and source location description.
1903   /// \param Size Size of allocated memory space
1904   /// \param Allocator Allocator information instruction
1905   /// \param Name Name of call Instruction for OMP_alloc
1906   ///
1907   /// \returns CallInst to the OMP_Alloc call
1908   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1909                            Value *Allocator, std::string Name = "");
1910 
1911   /// Create a runtime call for kmpc_free
1912   ///
1913   /// \param Loc The insert and source location description.
1914   /// \param Addr Address of memory space to be freed
1915   /// \param Allocator Allocator information instruction
1916   /// \param Name Name of call Instruction for OMP_Free
1917   ///
1918   /// \returns CallInst to the OMP_Free call
1919   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1920                           Value *Allocator, std::string Name = "");
1921 
1922   /// Create a runtime call for kmpc_threadprivate_cached
1923   ///
1924   /// \param Loc The insert and source location description.
1925   /// \param Pointer pointer to data to be cached
1926   /// \param Size size of data to be cached
1927   /// \param Name Name of call Instruction for callinst
1928   ///
1929   /// \returns CallInst to the thread private cache call.
1930   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1931                                       llvm::Value *Pointer,
1932                                       llvm::ConstantInt *Size,
1933                                       const llvm::Twine &Name = Twine(""));
1934 
1935   /// Create a runtime call for __tgt_interop_init
1936   ///
1937   /// \param Loc The insert and source location description.
1938   /// \param InteropVar variable to be allocated
1939   /// \param InteropType type of interop operation
1940   /// \param Device devide to which offloading will occur
1941   /// \param NumDependences  number of dependence variables
1942   /// \param DependenceAddress pointer to dependence variables
1943   /// \param HaveNowaitClause does nowait clause exist
1944   ///
1945   /// \returns CallInst to the __tgt_interop_init call
1946   CallInst *createOMPInteropInit(const LocationDescription &Loc,
1947                                  Value *InteropVar,
1948                                  omp::OMPInteropType InteropType, Value *Device,
1949                                  Value *NumDependences,
1950                                  Value *DependenceAddress,
1951                                  bool HaveNowaitClause);
1952 
1953   /// Create a runtime call for __tgt_interop_destroy
1954   ///
1955   /// \param Loc The insert and source location description.
1956   /// \param InteropVar variable to be allocated
1957   /// \param Device devide to which offloading will occur
1958   /// \param NumDependences  number of dependence variables
1959   /// \param DependenceAddress pointer to dependence variables
1960   /// \param HaveNowaitClause does nowait clause exist
1961   ///
1962   /// \returns CallInst to the __tgt_interop_destroy call
1963   CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
1964                                     Value *InteropVar, Value *Device,
1965                                     Value *NumDependences,
1966                                     Value *DependenceAddress,
1967                                     bool HaveNowaitClause);
1968 
1969   /// Create a runtime call for __tgt_interop_use
1970   ///
1971   /// \param Loc The insert and source location description.
1972   /// \param InteropVar variable to be allocated
1973   /// \param Device devide to which offloading will occur
1974   /// \param NumDependences  number of dependence variables
1975   /// \param DependenceAddress pointer to dependence variables
1976   /// \param HaveNowaitClause does nowait clause exist
1977   ///
1978   /// \returns CallInst to the __tgt_interop_use call
1979   CallInst *createOMPInteropUse(const LocationDescription &Loc,
1980                                 Value *InteropVar, Value *Device,
1981                                 Value *NumDependences, Value *DependenceAddress,
1982                                 bool HaveNowaitClause);
1983 
1984   /// The `omp target` interface
1985   ///
1986   /// For more information about the usage of this interface,
1987   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1988   ///
1989   ///{
1990 
1991   /// Create a runtime call for kmpc_target_init
1992   ///
1993   /// \param Loc The insert and source location description.
1994   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1995   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
1996 
1997   /// Create a runtime call for kmpc_target_deinit
1998   ///
1999   /// \param Loc The insert and source location description.
2000   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2001   void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
2002 
2003   ///}
2004 
2005 private:
2006   // Sets the function attributes expected for the outlined function
2007   void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
2008                                                  int32_t NumTeams,
2009                                                  int32_t NumThreads);
2010 
2011   // Creates the function ID/Address for the given outlined function.
2012   // In the case of an embedded device function the address of the function is
2013   // used, in the case of a non-offload function a constant is created.
2014   Constant *createOutlinedFunctionID(Function *OutlinedFn,
2015                                      StringRef EntryFnIDName);
2016 
2017   // Creates the region entry address for the outlined function
2018   Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2019                                         StringRef EntryFnName);
2020 
2021 public:
2022   /// Functions used to generate a function with the given name.
2023   using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2024 
2025   /// Create a unique name for the entry function using the source location
2026   /// information of the current target region. The name will be something like:
2027   ///
2028   /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2029   ///
2030   /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2031   /// mangled name of the function that encloses the target region and BB is the
2032   /// line number of the target region. CC is a count added when more than one
2033   /// region is located at the same location.
2034   ///
2035   /// If this target outline function is not an offload entry, we don't need to
2036   /// register it. This may happen if it is guarded by an if clause that is
2037   /// false at compile time, or no target archs have been specified.
2038   ///
2039   /// The created target region ID is used by the runtime library to identify
2040   /// the current target region, so it only has to be unique and not
2041   /// necessarily point to anything. It could be the pointer to the outlined
2042   /// function that implements the target region, but we aren't using that so
2043   /// that the compiler doesn't need to keep that, and could therefore inline
2044   /// the host function if proven worthwhile during optimization. In the other
2045   /// hand, if emitting code for the device, the ID has to be the function
2046   /// address so that it can retrieved from the offloading entry and launched
2047   /// by the runtime library. We also mark the outlined function to have
2048   /// external linkage in case we are emitting code for the device, because
2049   /// these functions will be entry points to the device.
2050   ///
2051   /// \param InfoManager The info manager keeping track of the offload entries
2052   /// \param EntryInfo The entry information about the function
2053   /// \param GenerateFunctionCallback The callback function to generate the code
2054   /// \param NumTeams Number default teams
2055   /// \param NumThreads Number default threads
2056   /// \param OutlinedFunction Pointer to the outlined function
2057   /// \param EntryFnIDName Name of the ID o be created
2058   void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2059                                 FunctionGenCallback &GenerateFunctionCallback,
2060                                 int32_t NumTeams, int32_t NumThreads,
2061                                 bool IsOffloadEntry, Function *&OutlinedFn,
2062                                 Constant *&OutlinedFnID);
2063 
2064   /// Registers the given function and sets up the attribtues of the function
2065   /// Returns the FunctionID.
2066   ///
2067   /// \param InfoManager The info manager keeping track of the offload entries
2068   /// \param EntryInfo The entry information about the function
2069   /// \param OutlinedFunction Pointer to the outlined function
2070   /// \param EntryFnName Name of the outlined function
2071   /// \param EntryFnIDName Name of the ID o be created
2072   /// \param NumTeams Number default teams
2073   /// \param NumThreads Number default threads
2074   Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
2075                                          Function *OutlinedFunction,
2076                                          StringRef EntryFnName,
2077                                          StringRef EntryFnIDName,
2078                                          int32_t NumTeams, int32_t NumThreads);
2079   /// Type of BodyGen to use for region codegen
2080   ///
2081   /// Priv: If device pointer privatization is required, emit the body of the
2082   /// region here. It will have to be duplicated: with and without
2083   /// privatization.
2084   /// DupNoPriv: If we need device pointer privatization, we need
2085   /// to emit the body of the region with no privatization in the 'else' branch
2086   /// of the conditional.
2087   /// NoPriv: If we don't require privatization of device
2088   /// pointers, we emit the body in between the runtime calls. This avoids
2089   /// duplicating the body code.
2090   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
2091 
2092   /// Generator for '#omp target data'
2093   ///
2094   /// \param Loc The location where the target data construct was encountered.
2095   /// \param AllocaIP The insertion points to be used for alloca instructions.
2096   /// \param CodeGenIP The insertion point at which the target directive code
2097   /// should be placed.
2098   /// \param IsBegin If true then emits begin mapper call otherwise emits
2099   /// end mapper call.
2100   /// \param DeviceID Stores the DeviceID from the device clause.
2101   /// \param IfCond Value which corresponds to the if clause condition.
2102   /// \param Info Stores all information realted to the Target Data directive.
2103   /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2104   /// \param BodyGenCB Optional Callback to generate the region code.
2105   /// \param DeviceAddrCB Optional callback to generate code related to
2106   /// use_device_ptr and use_device_addr.
2107   /// \param CustomMapperCB Optional callback to generate code related to
2108   /// custom mappers.
2109   OpenMPIRBuilder::InsertPointTy createTargetData(
2110       const LocationDescription &Loc, InsertPointTy AllocaIP,
2111       InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2112       TargetDataInfo &Info,
2113       function_ref<MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCB,
2114       omp::RuntimeFunction *MapperFunc = nullptr,
2115       function_ref<InsertPointTy(InsertPointTy CodeGenIP,
2116                                  BodyGenTy BodyGenType)>
2117           BodyGenCB = nullptr,
2118       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2119       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2120       Value *SrcLocInfo = nullptr);
2121 
2122   using TargetBodyGenCallbackTy = function_ref<InsertPointTy(
2123       InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2124 
2125   /// Generator for '#omp target'
2126   ///
2127   /// \param Loc where the target data construct was encountered.
2128   /// \param CodeGenIP The insertion point where the call to the outlined
2129   /// function should be emitted.
2130   /// \param EntryInfo The entry information about the function.
2131   /// \param NumTeams Number of teams specified in the num_teams clause.
2132   /// \param NumThreads Number of teams specified in the thread_limit clause.
2133   /// \param Inputs The input values to the region that will be passed.
2134   /// as arguments to the outlined function.
2135   /// \param BodyGenCB Callback that will generate the region code.
2136   InsertPointTy createTarget(const LocationDescription &Loc,
2137                              OpenMPIRBuilder::InsertPointTy CodeGenIP,
2138                              TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2139                              int32_t NumThreads,
2140                              SmallVectorImpl<Value *> &Inputs,
2141                              TargetBodyGenCallbackTy BodyGenCB);
2142 
2143   /// Declarations for LLVM-IR types (simple, array, function and structure) are
2144   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2145   /// we provide the declarations, the initializeTypes function will provide the
2146   /// values.
2147   ///
2148   ///{
2149 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2150 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
2151   ArrayType *VarName##Ty = nullptr;                                            \
2152   PointerType *VarName##PtrTy = nullptr;
2153 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
2154   FunctionType *VarName = nullptr;                                             \
2155   PointerType *VarName##Ptr = nullptr;
2156 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
2157   StructType *VarName = nullptr;                                               \
2158   PointerType *VarName##Ptr = nullptr;
2159 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2160 
2161   ///}
2162 
2163 private:
2164   /// Create all simple and struct types exposed by the runtime and remember
2165   /// the llvm::PointerTypes of them for easy access later.
2166   void initializeTypes(Module &M);
2167 
2168   /// Common interface for generating entry calls for OMP Directives.
2169   /// if the directive has a region/body, It will set the insertion
2170   /// point to the body
2171   ///
2172   /// \param OMPD Directive to generate entry blocks for
2173   /// \param EntryCall Call to the entry OMP Runtime Function
2174   /// \param ExitBB block where the region ends.
2175   /// \param Conditional indicate if the entry call result will be used
2176   ///        to evaluate a conditional of whether a thread will execute
2177   ///        body code or not.
2178   ///
2179   /// \return The insertion position in exit block
2180   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2181                                          BasicBlock *ExitBB,
2182                                          bool Conditional = false);
2183 
2184   /// Common interface to finalize the region
2185   ///
2186   /// \param OMPD Directive to generate exiting code for
2187   /// \param FinIP Insertion point for emitting Finalization code and exit call
2188   /// \param ExitCall Call to the ending OMP Runtime Function
2189   /// \param HasFinalize indicate if the directive will require finalization
2190   ///         and has a finalization callback in the stack that
2191   ///        should be called.
2192   ///
2193   /// \return The insertion position in exit block
2194   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2195                                         InsertPointTy FinIP,
2196                                         Instruction *ExitCall,
2197                                         bool HasFinalize = true);
2198 
2199   /// Common Interface to generate OMP inlined regions
2200   ///
2201   /// \param OMPD Directive to generate inlined region for
2202   /// \param EntryCall Call to the entry OMP Runtime Function
2203   /// \param ExitCall Call to the ending OMP Runtime Function
2204   /// \param BodyGenCB Body code generation callback.
2205   /// \param FiniCB Finalization Callback. Will be called when finalizing region
2206   /// \param Conditional indicate if the entry call result will be used
2207   ///        to evaluate a conditional of whether a thread will execute
2208   ///        body code or not.
2209   /// \param HasFinalize indicate if the directive will require finalization
2210   ///        and has a finalization callback in the stack that
2211   ///        should be called.
2212   /// \param IsCancellable if HasFinalize is set to true, indicate if the
2213   ///        the directive should be cancellable.
2214   /// \return The insertion point after the region
2215 
2216   InsertPointTy
2217   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2218                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2219                        FinalizeCallbackTy FiniCB, bool Conditional = false,
2220                        bool HasFinalize = true, bool IsCancellable = false);
2221 
2222   /// Get the platform-specific name separator.
2223   /// \param Parts different parts of the final name that needs separation
2224   /// \param FirstSeparator First separator used between the initial two
2225   ///        parts of the name.
2226   /// \param Separator separator used between all of the rest consecutive
2227   ///        parts of the name
2228   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2229                                            StringRef FirstSeparator,
2230                                            StringRef Separator);
2231 
2232   /// Returns corresponding lock object for the specified critical region
2233   /// name. If the lock object does not exist it is created, otherwise the
2234   /// reference to the existing copy is returned.
2235   /// \param CriticalName Name of the critical region.
2236   ///
2237   Value *getOMPCriticalRegionLock(StringRef CriticalName);
2238 
2239   /// Callback type for Atomic Expression update
2240   /// ex:
2241   /// \code{.cpp}
2242   /// unsigned x = 0;
2243   /// #pragma omp atomic update
2244   /// x = Expr(x_old);  //Expr() is any legal operation
2245   /// \endcode
2246   ///
2247   /// \param XOld the value of the atomic memory address to use for update
2248   /// \param IRB reference to the IRBuilder to use
2249   ///
2250   /// \returns Value to update X to.
2251   using AtomicUpdateCallbackTy =
2252       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2253 
2254 private:
2255   enum AtomicKind { Read, Write, Update, Capture, Compare };
2256 
2257   /// Determine whether to emit flush or not
2258   ///
2259   /// \param Loc    The insert and source location description.
2260   /// \param AO     The required atomic ordering
2261   /// \param AK     The OpenMP atomic operation kind used.
2262   ///
2263   /// \returns		wether a flush was emitted or not
2264   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2265                                     AtomicOrdering AO, AtomicKind AK);
2266 
2267   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2268   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2269   /// Only Scalar data types.
2270   ///
2271   /// \param AllocaIP	  The insertion point to be used for alloca
2272   ///                   instructions.
2273   /// \param X			    The target atomic pointer to be updated
2274   /// \param XElemTy    The element type of the atomic pointer.
2275   /// \param Expr		    The value to update X with.
2276   /// \param AO			    Atomic ordering of the generated atomic
2277   ///                   instructions.
2278   /// \param RMWOp		  The binary operation used for update. If
2279   ///                   operation is not supported by atomicRMW,
2280   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
2281   ///                   Then a `cmpExch` based	atomic will be generated.
2282   /// \param UpdateOp 	Code generator for complex expressions that cannot be
2283   ///                   expressed through atomicrmw instruction.
2284   /// \param VolatileX	     true if \a X volatile?
2285   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2286   ///                     update expression, false otherwise.
2287   ///                     (e.g. true for X = X BinOp Expr)
2288   ///
2289   /// \returns A pair of the old value of X before the update, and the value
2290   ///          used for the update.
2291   std::pair<Value *, Value *>
2292   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2293                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2294                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2295                    bool IsXBinopExpr);
2296 
2297   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2298   ///
2299   /// \Return The instruction
2300   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2301                                 AtomicRMWInst::BinOp RMWOp);
2302 
2303 public:
2304   /// a struct to pack relevant information while generating atomic Ops
2305   struct AtomicOpValue {
2306     Value *Var = nullptr;
2307     Type *ElemTy = nullptr;
2308     bool IsSigned = false;
2309     bool IsVolatile = false;
2310   };
2311 
2312   /// Emit atomic Read for : V = X --- Only Scalar data types.
2313   ///
2314   /// \param Loc    The insert and source location description.
2315   /// \param X			The target pointer to be atomically read
2316   /// \param V			Memory address where to store atomically read
2317   /// 					    value
2318   /// \param AO			Atomic ordering of the generated atomic
2319   /// 					    instructions.
2320   ///
2321   /// \return Insertion point after generated atomic read IR.
2322   InsertPointTy createAtomicRead(const LocationDescription &Loc,
2323                                  AtomicOpValue &X, AtomicOpValue &V,
2324                                  AtomicOrdering AO);
2325 
2326   /// Emit atomic write for : X = Expr --- Only Scalar data types.
2327   ///
2328   /// \param Loc    The insert and source location description.
2329   /// \param X			The target pointer to be atomically written to
2330   /// \param Expr		The value to store.
2331   /// \param AO			Atomic ordering of the generated atomic
2332   ///               instructions.
2333   ///
2334   /// \return Insertion point after generated atomic Write IR.
2335   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
2336                                   AtomicOpValue &X, Value *Expr,
2337                                   AtomicOrdering AO);
2338 
2339   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2340   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2341   /// Only Scalar data types.
2342   ///
2343   /// \param Loc      The insert and source location description.
2344   /// \param AllocaIP The insertion point to be used for alloca instructions.
2345   /// \param X        The target atomic pointer to be updated
2346   /// \param Expr     The value to update X with.
2347   /// \param AO       Atomic ordering of the generated atomic instructions.
2348   /// \param RMWOp    The binary operation used for update. If operation
2349   ///                 is	not supported by atomicRMW, or belong to
2350   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2351   ///                 atomic will be generated.
2352   /// \param UpdateOp 	Code generator for complex expressions that cannot be
2353   ///                   expressed through atomicrmw instruction.
2354   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2355   ///                     update expression, false otherwise.
2356   ///	                    (e.g. true for X = X BinOp Expr)
2357   ///
2358   /// \return Insertion point after generated atomic update IR.
2359   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
2360                                    InsertPointTy AllocaIP, AtomicOpValue &X,
2361                                    Value *Expr, AtomicOrdering AO,
2362                                    AtomicRMWInst::BinOp RMWOp,
2363                                    AtomicUpdateCallbackTy &UpdateOp,
2364                                    bool IsXBinopExpr);
2365 
2366   /// Emit atomic update for constructs: --- Only Scalar data types
2367   /// V = X; X = X BinOp Expr ,
2368   /// X = X BinOp Expr; V = X,
2369   /// V = X; X = Expr BinOp X,
2370   /// X = Expr BinOp X; V = X,
2371   /// V = X; X = UpdateOp(X),
2372   /// X = UpdateOp(X); V = X,
2373   ///
2374   /// \param Loc        The insert and source location description.
2375   /// \param AllocaIP   The insertion point to be used for alloca instructions.
2376   /// \param X          The target atomic pointer to be updated
2377   /// \param V          Memory address where to store captured value
2378   /// \param Expr       The value to update X with.
2379   /// \param AO         Atomic ordering of the generated atomic instructions
2380   /// \param RMWOp      The binary operation used for update. If
2381   ///                   operation is not supported by atomicRMW, or belong to
2382   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2383   ///                   atomic will be generated.
2384   /// \param UpdateOp   Code generator for complex expressions that cannot be
2385   ///                   expressed through atomicrmw instruction.
2386   /// \param UpdateExpr true if X is an in place update of the form
2387   ///                   X = X BinOp Expr or X = Expr BinOp X
2388   /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2389   ///                     update expression, false otherwise.
2390   ///                     (e.g. true for X = X BinOp Expr)
2391   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2392   ///                        'v', not an updated one.
2393   ///
2394   /// \return Insertion point after generated atomic capture IR.
2395   InsertPointTy
2396   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
2397                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2398                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2399                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2400                       bool IsPostfixUpdate, bool IsXBinopExpr);
2401 
2402   /// Emit atomic compare for constructs: --- Only scalar data types
2403   /// cond-expr-stmt:
2404   /// x = x ordop expr ? expr : x;
2405   /// x = expr ordop x ? expr : x;
2406   /// x = x == e ? d : x;
2407   /// x = e == x ? d : x; (this one is not in the spec)
2408   /// cond-update-stmt:
2409   /// if (x ordop expr) { x = expr; }
2410   /// if (expr ordop x) { x = expr; }
2411   /// if (x == e) { x = d; }
2412   /// if (e == x) { x = d; } (this one is not in the spec)
2413   /// conditional-update-capture-atomic:
2414   /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2415   /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2416   /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2417   ///                                         IsFailOnly=true)
2418   /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2419   /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2420   ///                                                IsFailOnly=true)
2421   ///
2422   /// \param Loc          The insert and source location description.
2423   /// \param X            The target atomic pointer to be updated.
2424   /// \param V            Memory address where to store captured value (for
2425   ///                     compare capture only).
2426   /// \param R            Memory address where to store comparison result
2427   ///                     (for compare capture with '==' only).
2428   /// \param E            The expected value ('e') for forms that use an
2429   ///                     equality comparison or an expression ('expr') for
2430   ///                     forms that use 'ordop' (logically an atomic maximum or
2431   ///                     minimum).
2432   /// \param D            The desired value for forms that use an equality
2433   ///                     comparison. If forms that use 'ordop', it should be
2434   ///                     \p nullptr.
2435   /// \param AO           Atomic ordering of the generated atomic instructions.
2436   /// \param Op           Atomic compare operation. It can only be ==, <, or >.
2437   /// \param IsXBinopExpr True if the conditional statement is in the form where
2438   ///                     x is on LHS. It only matters for < or >.
2439   /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
2440   ///                         'v', not an updated one (for compare capture
2441   ///                         only).
2442   /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
2443   ///                     only when the comparison fails. This is only valid for
2444   ///                     the case the comparison is '=='.
2445   ///
2446   /// \return Insertion point after generated atomic capture IR.
2447   InsertPointTy
2448   createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
2449                       AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
2450                       AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
2451                       bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2452 
2453   /// Create the control flow structure of a canonical OpenMP loop.
2454   ///
2455   /// The emitted loop will be disconnected, i.e. no edge to the loop's
2456   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2457   /// IRBuilder location is not preserved.
2458   ///
2459   /// \param DL        DebugLoc used for the instructions in the skeleton.
2460   /// \param TripCount Value to be used for the trip count.
2461   /// \param F         Function in which to insert the BasicBlocks.
2462   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
2463   ///                         typically the body itself.
2464   /// \param PostInsertBefore Where to insert BBs that execute after the body.
2465   /// \param Name      Base name used to derive BB
2466   ///                  and instruction names.
2467   ///
2468   /// \returns The CanonicalLoopInfo that represents the emitted loop.
2469   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
2470                                         Function *F,
2471                                         BasicBlock *PreInsertBefore,
2472                                         BasicBlock *PostInsertBefore,
2473                                         const Twine &Name = {});
2474   /// OMP Offload Info Metadata name string
2475   const std::string ompOffloadInfoName = "omp_offload.info";
2476 
2477   /// Loads all the offload entries information from the host IR
2478   /// metadata. This function is only meant to be used with device code
2479   /// generation.
2480   ///
2481   /// \param M         Module to load Metadata info from. Module passed maybe
2482   /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2483   void loadOffloadInfoMetadata(Module &M);
2484 
2485   /// Gets (if variable with the given name already exist) or creates
2486   /// internal global variable with the specified Name. The created variable has
2487   /// linkage CommonLinkage by default and is initialized by null value.
2488   /// \param Ty Type of the global variable. If it is exist already the type
2489   /// must be the same.
2490   /// \param Name Name of the variable.
2491   GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
2492                                               unsigned AddressSpace = 0);
2493 };
2494 
2495 /// Class to represented the control flow structure of an OpenMP canonical loop.
2496 ///
2497 /// The control-flow structure is standardized for easy consumption by
2498 /// directives associated with loops. For instance, the worksharing-loop
2499 /// construct may change this control flow such that each loop iteration is
2500 /// executed on only one thread. The constraints of a canonical loop in brief
2501 /// are:
2502 ///
2503 ///  * The number of loop iterations must have been computed before entering the
2504 ///    loop.
2505 ///
2506 ///  * Has an (unsigned) logical induction variable that starts at zero and
2507 ///    increments by one.
2508 ///
2509 ///  * The loop's CFG itself has no side-effects. The OpenMP specification
2510 ///    itself allows side-effects, but the order in which they happen, including
2511 ///    how often or whether at all, is unspecified. We expect that the frontend
2512 ///    will emit those side-effect instructions somewhere (e.g. before the loop)
2513 ///    such that the CanonicalLoopInfo itself can be side-effect free.
2514 ///
2515 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2516 /// execution of a loop body that satifies these constraints. It does NOT
2517 /// represent arbitrary SESE regions that happen to contain a loop. Do not use
2518 /// CanonicalLoopInfo for such purposes.
2519 ///
2520 /// The control flow can be described as follows:
2521 ///
2522 ///     Preheader
2523 ///        |
2524 ///  /-> Header
2525 ///  |     |
2526 ///  |    Cond---\
2527 ///  |     |     |
2528 ///  |    Body   |
2529 ///  |    | |    |
2530 ///  |   <...>   |
2531 ///  |    | |    |
2532 ///   \--Latch   |
2533 ///              |
2534 ///             Exit
2535 ///              |
2536 ///            After
2537 ///
2538 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2539 /// including) and end at AfterIP (at the After's first instruction, excluding).
2540 /// That is, instructions in the Preheader and After blocks (except the
2541 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2542 /// side-effects. Typically, the Preheader is used to compute the loop's trip
2543 /// count. The instructions from BodyIP (at the Body block's first instruction,
2544 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2545 /// control and thus can have side-effects. The body block is the single entry
2546 /// point into the loop body, which may contain arbitrary control flow as long
2547 /// as all control paths eventually branch to the Latch block.
2548 ///
2549 /// TODO: Consider adding another standardized BasicBlock between Body CFG and
2550 /// Latch to guarantee that there is only a single edge to the latch. It would
2551 /// make loop transformations easier to not needing to consider multiple
2552 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2553 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2554 /// executes after each body iteration.
2555 ///
2556 /// There must be no loop-carried dependencies through llvm::Values. This is
2557 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2558 /// for the induction variable.
2559 ///
2560 /// All code in Header, Cond, Latch and Exit (plus the terminator of the
2561 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2562 /// by assertOK(). They are expected to not be modified unless explicitly
2563 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2564 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2565 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2566 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2567 /// anymore as its underlying control flow may not exist anymore.
2568 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2569 /// may also return a new CanonicalLoopInfo that can be passed to other
2570 /// loop-associated construct implementing methods. These loop-transforming
2571 /// methods may either create a new CanonicalLoopInfo usually using
2572 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2573 /// modify one of the input CanonicalLoopInfo and return it as representing the
2574 /// modified loop. What is done is an implementation detail of
2575 /// transformation-implementing method and callers should always assume that the
2576 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2577 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2578 /// created by createCanonicalLoop, such that transforming methods do not have
2579 /// to special case where the CanonicalLoopInfo originated from.
2580 ///
2581 /// Generally, methods consuming CanonicalLoopInfo do not need an
2582 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2583 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2584 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2585 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2586 /// any InsertPoint in the Preheader, After or Block can still be used after
2587 /// calling such a method.
2588 ///
2589 /// TODO: Provide mechanisms for exception handling and cancellation points.
2590 ///
2591 /// Defined outside OpenMPIRBuilder because nested classes cannot be
2592 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2593 class CanonicalLoopInfo {
2594   friend class OpenMPIRBuilder;
2595 
2596 private:
2597   BasicBlock *Header = nullptr;
2598   BasicBlock *Cond = nullptr;
2599   BasicBlock *Latch = nullptr;
2600   BasicBlock *Exit = nullptr;
2601 
2602   /// Add the control blocks of this loop to \p BBs.
2603   ///
2604   /// This does not include any block from the body, including the one returned
2605   /// by getBody().
2606   ///
2607   /// FIXME: This currently includes the Preheader and After blocks even though
2608   /// their content is (mostly) not under CanonicalLoopInfo's control.
2609   /// Re-evaluated whether this makes sense.
2610   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2611 
2612   /// Sets the number of loop iterations to the given value. This value must be
2613   /// valid in the condition block (i.e., defined in the preheader) and is
2614   /// interpreted as an unsigned integer.
2615   void setTripCount(Value *TripCount);
2616 
2617   /// Replace all uses of the canonical induction variable in the loop body with
2618   /// a new one.
2619   ///
2620   /// The intended use case is to update the induction variable for an updated
2621   /// iteration space such that it can stay normalized in the 0...tripcount-1
2622   /// range.
2623   ///
2624   /// The \p Updater is called with the (presumable updated) current normalized
2625   /// induction variable and is expected to return the value that uses of the
2626   /// pre-updated induction values should use instead, typically dependent on
2627   /// the new induction variable. This is a lambda (instead of e.g. just passing
2628   /// the new value) to be able to distinguish the uses of the pre-updated
2629   /// induction variable and uses of the induction varible to compute the
2630   /// updated induction variable value.
2631   void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2632 
2633 public:
2634   /// Returns whether this object currently represents the IR of a loop. If
2635   /// returning false, it may have been consumed by a loop transformation or not
2636   /// been intialized. Do not use in this case;
2637   bool isValid() const { return Header; }
2638 
2639   /// The preheader ensures that there is only a single edge entering the loop.
2640   /// Code that must be execute before any loop iteration can be emitted here,
2641   /// such as computing the loop trip count and begin lifetime markers. Code in
2642   /// the preheader is not considered part of the canonical loop.
2643   BasicBlock *getPreheader() const;
2644 
2645   /// The header is the entry for each iteration. In the canonical control flow,
2646   /// it only contains the PHINode for the induction variable.
2647   BasicBlock *getHeader() const {
2648     assert(isValid() && "Requires a valid canonical loop");
2649     return Header;
2650   }
2651 
2652   /// The condition block computes whether there is another loop iteration. If
2653   /// yes, branches to the body; otherwise to the exit block.
2654   BasicBlock *getCond() const {
2655     assert(isValid() && "Requires a valid canonical loop");
2656     return Cond;
2657   }
2658 
2659   /// The body block is the single entry for a loop iteration and not controlled
2660   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2661   /// eventually branch to the \p Latch block.
2662   BasicBlock *getBody() const {
2663     assert(isValid() && "Requires a valid canonical loop");
2664     return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2665   }
2666 
2667   /// Reaching the latch indicates the end of the loop body code. In the
2668   /// canonical control flow, it only contains the increment of the induction
2669   /// variable.
2670   BasicBlock *getLatch() const {
2671     assert(isValid() && "Requires a valid canonical loop");
2672     return Latch;
2673   }
2674 
2675   /// Reaching the exit indicates no more iterations are being executed.
2676   BasicBlock *getExit() const {
2677     assert(isValid() && "Requires a valid canonical loop");
2678     return Exit;
2679   }
2680 
2681   /// The after block is intended for clean-up code such as lifetime end
2682   /// markers. It is separate from the exit block to ensure, analogous to the
2683   /// preheader, it having just a single entry edge and being free from PHI
2684   /// nodes should there be multiple loop exits (such as from break
2685   /// statements/cancellations).
2686   BasicBlock *getAfter() const {
2687     assert(isValid() && "Requires a valid canonical loop");
2688     return Exit->getSingleSuccessor();
2689   }
2690 
2691   /// Returns the llvm::Value containing the number of loop iterations. It must
2692   /// be valid in the preheader and always interpreted as an unsigned integer of
2693   /// any bit-width.
2694   Value *getTripCount() const {
2695     assert(isValid() && "Requires a valid canonical loop");
2696     Instruction *CmpI = &Cond->front();
2697     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2698     return CmpI->getOperand(1);
2699   }
2700 
2701   /// Returns the instruction representing the current logical induction
2702   /// variable. Always unsigned, always starting at 0 with an increment of one.
2703   Instruction *getIndVar() const {
2704     assert(isValid() && "Requires a valid canonical loop");
2705     Instruction *IndVarPHI = &Header->front();
2706     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2707     return IndVarPHI;
2708   }
2709 
2710   /// Return the type of the induction variable (and the trip count).
2711   Type *getIndVarType() const {
2712     assert(isValid() && "Requires a valid canonical loop");
2713     return getIndVar()->getType();
2714   }
2715 
2716   /// Return the insertion point for user code before the loop.
2717   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
2718     assert(isValid() && "Requires a valid canonical loop");
2719     BasicBlock *Preheader = getPreheader();
2720     return {Preheader, std::prev(Preheader->end())};
2721   };
2722 
2723   /// Return the insertion point for user code in the body.
2724   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
2725     assert(isValid() && "Requires a valid canonical loop");
2726     BasicBlock *Body = getBody();
2727     return {Body, Body->begin()};
2728   };
2729 
2730   /// Return the insertion point for user code after the loop.
2731   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
2732     assert(isValid() && "Requires a valid canonical loop");
2733     BasicBlock *After = getAfter();
2734     return {After, After->begin()};
2735   };
2736 
2737   Function *getFunction() const {
2738     assert(isValid() && "Requires a valid canonical loop");
2739     return Header->getParent();
2740   }
2741 
2742   /// Consistency self-check.
2743   void assertOK() const;
2744 
2745   /// Invalidate this loop. That is, the underlying IR does not fulfill the
2746   /// requirements of an OpenMP canonical loop anymore.
2747   void invalidate();
2748 };
2749 
2750 } // end namespace llvm
2751 
2752 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
2753