1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Analysis/MemorySSAUpdater.h"
18 #include "llvm/Frontend/OpenMP/OMPConstants.h"
19 #include "llvm/IR/DebugLoc.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/Support/Allocator.h"
22 #include <forward_list>
23 #include <map>
24 #include <optional>
25 
26 namespace llvm {
27 class CanonicalLoopInfo;
28 struct TargetRegionEntryInfo;
29 class OffloadEntriesInfoManager;
30 
31 /// Move the instruction after an InsertPoint to the beginning of another
32 /// BasicBlock.
33 ///
34 /// The instructions after \p IP are moved to the beginning of \p New which must
35 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
36 /// \p New will be added such that there is no semantic change. Otherwise, the
37 /// \p IP insert block remains degenerate and it is up to the caller to insert a
38 /// terminator.
39 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
40               bool CreateBranch);
41 
42 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
43 /// insert location will stick to after the instruction before the insertion
44 /// point (instead of moving with the instruction the InsertPoint stores
45 /// internally).
46 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
47 
48 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
49 /// (missing the terminator).
50 ///
51 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
52 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
53 /// is true, a branch to the new successor will new created such that
54 /// semantically there is no change; otherwise the block of the insertion point
55 /// remains degenerate and it is the caller's responsibility to insert a
56 /// terminator. Returns the new successor block.
57 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
58                     llvm::Twine Name = {});
59 
60 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
61 /// degenerate (missing the terminator).  Its new insert location will stick to
62 /// after the instruction before the insertion point (instead of moving with the
63 /// instruction the InsertPoint stores internally).
64 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
65                     llvm::Twine Name = {});
66 
67 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
68 /// degenerate (missing the terminator).  Its new insert location will stick to
69 /// after the instruction before the insertion point (instead of moving with the
70 /// instruction the InsertPoint stores internally).
71 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
72 
73 /// Like splitBB, but reuses the current block's name for the new name.
74 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
75                               llvm::Twine Suffix = ".split");
76 
77 /// Captures attributes that affect generating LLVM-IR using the
78 /// OpenMPIRBuilder and related classes. Note that not all attributes are
79 /// required for all classes or functions. In some use cases the configuration
80 /// is not necessary at all, because because the only functions that are called
81 /// are ones that are not dependent on the configuration.
82 class OpenMPIRBuilderConfig {
83 public:
84   /// Flag for specifying if the compilation is done for embedded device code
85   /// or host code.
86   std::optional<bool> IsEmbedded;
87 
88   /// Flag for specifying if the compilation is done for an offloading target,
89   /// like GPU.
90   std::optional<bool> IsTargetCodegen;
91 
92   /// Flag for specifying weather a requires unified_shared_memory
93   /// directive is present or not.
94   std::optional<bool> HasRequiresUnifiedSharedMemory;
95 
96   // Flag for specifying if offloading is mandatory.
97   std::optional<bool> OpenMPOffloadMandatory;
98 
99   /// First separator used between the initial two parts of a name.
100   std::optional<StringRef> FirstSeparator;
101   /// Separator used between all of the rest consecutive parts of s name
102   std::optional<StringRef> Separator;
103 
OpenMPIRBuilderConfig()104   OpenMPIRBuilderConfig() {}
OpenMPIRBuilderConfig(bool IsEmbedded,bool IsTargetCodegen,bool HasRequiresUnifiedSharedMemory,bool OpenMPOffloadMandatory)105   OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen,
106                         bool HasRequiresUnifiedSharedMemory,
107                         bool OpenMPOffloadMandatory)
108       : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen),
109         HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory),
110         OpenMPOffloadMandatory(OpenMPOffloadMandatory) {}
111 
112   // Getters functions that assert if the required values are not present.
isEmbedded()113   bool isEmbedded() const {
114     assert(IsEmbedded.has_value() && "IsEmbedded is not set");
115     return *IsEmbedded;
116   }
117 
isTargetCodegen()118   bool isTargetCodegen() const {
119     assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set");
120     return *IsTargetCodegen;
121   }
122 
hasRequiresUnifiedSharedMemory()123   bool hasRequiresUnifiedSharedMemory() const {
124     assert(HasRequiresUnifiedSharedMemory.has_value() &&
125            "HasUnifiedSharedMemory is not set");
126     return *HasRequiresUnifiedSharedMemory;
127   }
128 
openMPOffloadMandatory()129   bool openMPOffloadMandatory() const {
130     assert(OpenMPOffloadMandatory.has_value() &&
131            "OpenMPOffloadMandatory is not set");
132     return *OpenMPOffloadMandatory;
133   }
134   // Returns the FirstSeparator if set, otherwise use the default
135   // separator depending on isTargetCodegen
firstSeparator()136   StringRef firstSeparator() const {
137     if (FirstSeparator.has_value())
138       return *FirstSeparator;
139     if (isTargetCodegen())
140       return "_";
141     return ".";
142   }
143 
144   // Returns the Separator if set, otherwise use the default
145   // separator depending on isTargetCodegen
separator()146   StringRef separator() const {
147     if (Separator.has_value())
148       return *Separator;
149     if (isTargetCodegen())
150       return "$";
151     return ".";
152   }
153 
setIsEmbedded(bool Value)154   void setIsEmbedded(bool Value) { IsEmbedded = Value; }
setIsTargetCodegen(bool Value)155   void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; }
setHasRequiresUnifiedSharedMemory(bool Value)156   void setHasRequiresUnifiedSharedMemory(bool Value) {
157     HasRequiresUnifiedSharedMemory = Value;
158   }
setFirstSeparator(StringRef FS)159   void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
setSeparator(StringRef S)160   void setSeparator(StringRef S) { Separator = S; }
161 };
162 
163 /// An interface to create LLVM-IR for OpenMP directives.
164 ///
165 /// Each OpenMP directive has a corresponding public generator method.
166 class OpenMPIRBuilder {
167 public:
168   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
169   /// not have an effect on \p M (see initialize)
OpenMPIRBuilder(Module & M)170   OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
171   ~OpenMPIRBuilder();
172 
173   /// Initialize the internal state, this will put structures types and
174   /// potentially other helpers into the underlying module. Must be called
175   /// before any other method and only once!
176   void initialize();
177 
setConfig(OpenMPIRBuilderConfig C)178   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
179 
180   /// Finalize the underlying module, e.g., by outlining regions.
181   /// \param Fn                    The function to be finalized. If not used,
182   ///                              all functions are finalized.
183   void finalize(Function *Fn = nullptr);
184 
185   /// Add attributes known for \p FnID to \p Fn.
186   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
187 
188   /// Type used throughout for insertion points.
189   using InsertPointTy = IRBuilder<>::InsertPoint;
190 
191   /// Get the create a name using the platform specific separators.
192   /// \param Parts parts of the final name that needs separation
193   /// The created name has a first separator between the first and second part
194   /// and a second separator between all other parts.
195   /// E.g. with FirstSeparator "$" and Separator "." and
196   /// parts: "p1", "p2", "p3", "p4"
197   /// The resulting name is "p1$p2.p3.p4"
198   /// The separators are retrieved from the OpenMPIRBuilderConfig.
199   std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
200 
201   /// Callback type for variable finalization (think destructors).
202   ///
203   /// \param CodeGenIP is the insertion point at which the finalization code
204   ///                  should be placed.
205   ///
206   /// A finalize callback knows about all objects that need finalization, e.g.
207   /// destruction, when the scope of the currently generated construct is left
208   /// at the time, and location, the callback is invoked.
209   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
210 
211   struct FinalizationInfo {
212     /// The finalization callback provided by the last in-flight invocation of
213     /// createXXXX for the directive of kind DK.
214     FinalizeCallbackTy FiniCB;
215 
216     /// The directive kind of the innermost directive that has an associated
217     /// region which might require finalization when it is left.
218     omp::Directive DK;
219 
220     /// Flag to indicate if the directive is cancellable.
221     bool IsCancellable;
222   };
223 
224   /// Push a finalization callback on the finalization stack.
225   ///
226   /// NOTE: Temporary solution until Clang CG is gone.
pushFinalizationCB(const FinalizationInfo & FI)227   void pushFinalizationCB(const FinalizationInfo &FI) {
228     FinalizationStack.push_back(FI);
229   }
230 
231   /// Pop the last finalization callback from the finalization stack.
232   ///
233   /// NOTE: Temporary solution until Clang CG is gone.
popFinalizationCB()234   void popFinalizationCB() { FinalizationStack.pop_back(); }
235 
236   /// Callback type for body (=inner region) code generation
237   ///
238   /// The callback takes code locations as arguments, each describing a
239   /// location where additional instructions can be inserted.
240   ///
241   /// The CodeGenIP may be in the middle of a basic block or point to the end of
242   /// it. The basic block may have a terminator or be degenerate. The callback
243   /// function may just insert instructions at that position, but also split the
244   /// block (without the Before argument of BasicBlock::splitBasicBlock such
245   /// that the identify of the split predecessor block is preserved) and insert
246   /// additional control flow, including branches that do not lead back to what
247   /// follows the CodeGenIP. Note that since the callback is allowed to split
248   /// the block, callers must assume that InsertPoints to positions in the
249   /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
250   /// such InsertPoints need to be preserved, it can split the block itself
251   /// before calling the callback.
252   ///
253   /// AllocaIP and CodeGenIP must not point to the same position.
254   ///
255   /// \param AllocaIP is the insertion point at which new alloca instructions
256   ///                 should be placed. The BasicBlock it is pointing to must
257   ///                 not be split.
258   /// \param CodeGenIP is the insertion point at which the body code should be
259   ///                  placed.
260   using BodyGenCallbackTy =
261       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
262 
263   // This is created primarily for sections construct as llvm::function_ref
264   // (BodyGenCallbackTy) is not storable (as described in the comments of
265   // function_ref class - function_ref contains non-ownable reference
266   // to the callable.
267   using StorableBodyGenCallbackTy =
268       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
269 
270   /// Callback type for loop body code generation.
271   ///
272   /// \param CodeGenIP is the insertion point where the loop's body code must be
273   ///                  placed. This will be a dedicated BasicBlock with a
274   ///                  conditional branch from the loop condition check and
275   ///                  terminated with an unconditional branch to the loop
276   ///                  latch.
277   /// \param IndVar    is the induction variable usable at the insertion point.
278   using LoopBodyGenCallbackTy =
279       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
280 
281   /// Callback type for variable privatization (think copy & default
282   /// constructor).
283   ///
284   /// \param AllocaIP is the insertion point at which new alloca instructions
285   ///                 should be placed.
286   /// \param CodeGenIP is the insertion point at which the privatization code
287   ///                  should be placed.
288   /// \param Original The value being copied/created, should not be used in the
289   ///                 generated IR.
290   /// \param Inner The equivalent of \p Original that should be used in the
291   ///              generated IR; this is equal to \p Original if the value is
292   ///              a pointer and can thus be passed directly, otherwise it is
293   ///              an equivalent but different value.
294   /// \param ReplVal The replacement value, thus a copy or new created version
295   ///                of \p Inner.
296   ///
297   /// \returns The new insertion point where code generation continues and
298   ///          \p ReplVal the replacement value.
299   using PrivatizeCallbackTy = function_ref<InsertPointTy(
300       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
301       Value &Inner, Value *&ReplVal)>;
302 
303   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
304   /// (filename, line, column, ...).
305   struct LocationDescription {
LocationDescriptionLocationDescription306     LocationDescription(const IRBuilderBase &IRB)
307         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescriptionLocationDescription308     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
LocationDescriptionLocationDescription309     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
310         : IP(IP), DL(DL) {}
311     InsertPointTy IP;
312     DebugLoc DL;
313   };
314 
315   /// Emitter methods for OpenMP directives.
316   ///
317   ///{
318 
319   /// Generator for '#omp barrier'
320   ///
321   /// \param Loc The location where the barrier directive was encountered.
322   /// \param DK The kind of directive that caused the barrier.
323   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
324   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
325   ///                        should be checked and acted upon.
326   ///
327   /// \returns The insertion point after the barrier.
328   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
329                               bool ForceSimpleCall = false,
330                               bool CheckCancelFlag = true);
331 
332   /// Generator for '#omp cancel'
333   ///
334   /// \param Loc The location where the directive was encountered.
335   /// \param IfCondition The evaluated 'if' clause expression, if any.
336   /// \param CanceledDirective The kind of directive that is cancled.
337   ///
338   /// \returns The insertion point after the barrier.
339   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
340                              omp::Directive CanceledDirective);
341 
342   /// Generator for '#omp parallel'
343   ///
344   /// \param Loc The insert and source location description.
345   /// \param AllocaIP The insertion points to be used for alloca instructions.
346   /// \param BodyGenCB Callback that will generate the region code.
347   /// \param PrivCB Callback to copy a given variable (think copy constructor).
348   /// \param FiniCB Callback to finalize variable copies.
349   /// \param IfCondition The evaluated 'if' clause expression, if any.
350   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
351   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
352   /// \param IsCancellable Flag to indicate a cancellable parallel region.
353   ///
354   /// \returns The insertion position *after* the parallel.
355   IRBuilder<>::InsertPoint
356   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
357                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
358                  FinalizeCallbackTy FiniCB, Value *IfCondition,
359                  Value *NumThreads, omp::ProcBindKind ProcBind,
360                  bool IsCancellable);
361 
362   /// Generator for the control flow structure of an OpenMP canonical loop.
363   ///
364   /// This generator operates on the logical iteration space of the loop, i.e.
365   /// the caller only has to provide a loop trip count of the loop as defined by
366   /// base language semantics. The trip count is interpreted as an unsigned
367   /// integer. The induction variable passed to \p BodyGenCB will be of the same
368   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
369   /// convert the logical iteration variable to the loop counter variable in the
370   /// loop body.
371   ///
372   /// \param Loc       The insert and source location description. The insert
373   ///                  location can be between two instructions or the end of a
374   ///                  degenerate block (e.g. a BB under construction).
375   /// \param BodyGenCB Callback that will generate the loop body code.
376   /// \param TripCount Number of iterations the loop body is executed.
377   /// \param Name      Base name used to derive BB and instruction names.
378   ///
379   /// \returns An object representing the created control flow structure which
380   ///          can be used for loop-associated directives.
381   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
382                                          LoopBodyGenCallbackTy BodyGenCB,
383                                          Value *TripCount,
384                                          const Twine &Name = "loop");
385 
386   /// Generator for the control flow structure of an OpenMP canonical loop.
387   ///
388   /// Instead of a logical iteration space, this allows specifying user-defined
389   /// loop counter values using increment, upper- and lower bounds. To
390   /// disambiguate the terminology when counting downwards, instead of lower
391   /// bounds we use \p Start for the loop counter value in the first body
392   /// iteration.
393   ///
394   /// Consider the following limitations:
395   ///
396   ///  * A loop counter space over all integer values of its bit-width cannot be
397   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
398   ///    stored into an 8 bit integer):
399   ///
400   ///      DO I = 0, 255, 1
401   ///
402   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
403   ///    effectively counting downwards:
404   ///
405   ///      for (uint8_t i = 100u; i > 0; i += 127u)
406   ///
407   ///
408   /// TODO: May need to add additional parameters to represent:
409   ///
410   ///  * Allow representing downcounting with unsigned integers.
411   ///
412   ///  * Sign of the step and the comparison operator might disagree:
413   ///
414   ///      for (int i = 0; i < 42; i -= 1u)
415   ///
416   //
417   /// \param Loc       The insert and source location description.
418   /// \param BodyGenCB Callback that will generate the loop body code.
419   /// \param Start     Value of the loop counter for the first iterations.
420   /// \param Stop      Loop counter values past this will stop the loop.
421   /// \param Step      Loop counter increment after each iteration; negative
422   ///                  means counting down.
423   /// \param IsSigned  Whether Start, Stop and Step are signed integers.
424   /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
425   ///                      counter.
426   /// \param ComputeIP Insertion point for instructions computing the trip
427   ///                  count. Can be used to ensure the trip count is available
428   ///                  at the outermost loop of a loop nest. If not set,
429   ///                  defaults to the preheader of the generated loop.
430   /// \param Name      Base name used to derive BB and instruction names.
431   ///
432   /// \returns An object representing the created control flow structure which
433   ///          can be used for loop-associated directives.
434   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
435                                          LoopBodyGenCallbackTy BodyGenCB,
436                                          Value *Start, Value *Stop, Value *Step,
437                                          bool IsSigned, bool InclusiveStop,
438                                          InsertPointTy ComputeIP = {},
439                                          const Twine &Name = "loop");
440 
441   /// Collapse a loop nest into a single loop.
442   ///
443   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
444   /// that has the same number of innermost loop iterations as the origin loop
445   /// nest. The induction variables of the input loops are derived from the
446   /// collapsed loop's induction variable. This is intended to be used to
447   /// implement OpenMP's collapse clause. Before applying a directive,
448   /// collapseLoops normalizes a loop nest to contain only a single loop and the
449   /// directive's implementation does not need to handle multiple loops itself.
450   /// This does not remove the need to handle all loop nest handling by
451   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
452   /// modifier of the worksharing-loop directive.
453   ///
454   /// Example:
455   /// \code
456   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
457   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
458   ///       body(i, j);
459   /// \endcode
460   ///
461   /// After collapsing with Loops={i,j}, the loop is changed to
462   /// \code
463   ///   for (int ij = 0; ij < 63; ++ij) {
464   ///     int i = ij / 9;
465   ///     int j = ij % 9;
466   ///     body(i, j);
467   ///   }
468   /// \endcode
469   ///
470   /// In the current implementation, the following limitations apply:
471   ///
472   ///  * All input loops have an induction variable of the same type.
473   ///
474   ///  * The collapsed loop will have the same trip count integer type as the
475   ///    input loops. Therefore it is possible that the collapsed loop cannot
476   ///    represent all iterations of the input loops. For instance, assuming a
477   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
478   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
479   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
480   ///    in this case.
481   ///
482   ///  * The trip counts of every input loop must be available at \p ComputeIP.
483   ///    Non-rectangular loops are not yet supported.
484   ///
485   ///  * At each nest level, code between a surrounding loop and its nested loop
486   ///    is hoisted into the loop body, and such code will be executed more
487   ///    often than before collapsing (or not at all if any inner loop iteration
488   ///    has a trip count of 0). This is permitted by the OpenMP specification.
489   ///
490   /// \param DL        Debug location for instructions added for collapsing,
491   ///                  such as instructions to compute/derive the input loop's
492   ///                  induction variables.
493   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
494   ///                  from outermost-to-innermost and every control flow of a
495   ///                  loop's body must pass through its directly nested loop.
496   /// \param ComputeIP Where additional instruction that compute the collapsed
497   ///                  trip count. If not set, defaults to before the generated
498   ///                  loop.
499   ///
500   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
501   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
502                                    ArrayRef<CanonicalLoopInfo *> Loops,
503                                    InsertPointTy ComputeIP);
504 
505 private:
506   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
507   ///
508   /// This takes a \p LoopInfo representing a canonical loop, such as the one
509   /// created by \p createCanonicalLoop and emits additional instructions to
510   /// turn it into a workshare loop. In particular, it calls to an OpenMP
511   /// runtime function in the preheader to obtain the loop bounds to be used in
512   /// the current thread, updates the relevant instructions in the canonical
513   /// loop and calls to an OpenMP runtime finalization function after the loop.
514   ///
515   /// \param DL       Debug location for instructions added for the
516   ///                 workshare-loop construct itself.
517   /// \param CLI      A descriptor of the canonical loop to workshare.
518   /// \param AllocaIP An insertion point for Alloca instructions usable in the
519   ///                 preheader of the loop.
520   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
521   ///                     the loop.
522   ///
523   /// \returns Point where to insert code after the workshare construct.
524   InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
525                                          InsertPointTy AllocaIP,
526                                          bool NeedsBarrier);
527 
528   /// Modifies the canonical loop a statically-scheduled workshare loop with a
529   /// user-specified chunk size.
530   ///
531   /// \param DL           Debug location for instructions added for the
532   ///                     workshare-loop construct itself.
533   /// \param CLI          A descriptor of the canonical loop to workshare.
534   /// \param AllocaIP     An insertion point for Alloca instructions usable in
535   ///                     the preheader of the loop.
536   /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
537   ///                     loop.
538   /// \param ChunkSize    The user-specified chunk size.
539   ///
540   /// \returns Point where to insert code after the workshare construct.
541   InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
542                                                 CanonicalLoopInfo *CLI,
543                                                 InsertPointTy AllocaIP,
544                                                 bool NeedsBarrier,
545                                                 Value *ChunkSize);
546 
547   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
548   ///
549   /// This takes a \p LoopInfo representing a canonical loop, such as the one
550   /// created by \p createCanonicalLoop and emits additional instructions to
551   /// turn it into a workshare loop. In particular, it calls to an OpenMP
552   /// runtime function in the preheader to obtain, and then in each iteration
553   /// to update the loop counter.
554   ///
555   /// \param DL       Debug location for instructions added for the
556   ///                 workshare-loop construct itself.
557   /// \param CLI      A descriptor of the canonical loop to workshare.
558   /// \param AllocaIP An insertion point for Alloca instructions usable in the
559   ///                 preheader of the loop.
560   /// \param SchedType Type of scheduling to be passed to the init function.
561   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
562   ///                     the loop.
563   /// \param Chunk    The size of loop chunk considered as a unit when
564   ///                 scheduling. If \p nullptr, defaults to 1.
565   ///
566   /// \returns Point where to insert code after the workshare construct.
567   InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
568                                           InsertPointTy AllocaIP,
569                                           omp::OMPScheduleType SchedType,
570                                           bool NeedsBarrier,
571                                           Value *Chunk = nullptr);
572 
573   /// Create alternative version of the loop to support if clause
574   ///
575   /// OpenMP if clause can require to generate second loop. This loop
576   /// will be executed when if clause condition is not met. createIfVersion
577   /// adds branch instruction to the copied loop if \p  ifCond is not met.
578   ///
579   /// \param Loop       Original loop which should be versioned.
580   /// \param IfCond     Value which corresponds to if clause condition
581   /// \param VMap       Value to value map to define relation between
582   ///                   original and copied loop values and loop blocks.
583   /// \param NamePrefix Optional name prefix for if.then if.else blocks.
584   void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
585                        ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
586 
587 public:
588   /// Modifies the canonical loop to be a workshare loop.
589   ///
590   /// This takes a \p LoopInfo representing a canonical loop, such as the one
591   /// created by \p createCanonicalLoop and emits additional instructions to
592   /// turn it into a workshare loop. In particular, it calls to an OpenMP
593   /// runtime function in the preheader to obtain the loop bounds to be used in
594   /// the current thread, updates the relevant instructions in the canonical
595   /// loop and calls to an OpenMP runtime finalization function after the loop.
596   ///
597   /// The concrete transformation is done by applyStaticWorkshareLoop,
598   /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
599   /// on the value of \p SchedKind and \p ChunkSize.
600   ///
601   /// \param DL       Debug location for instructions added for the
602   ///                 workshare-loop construct itself.
603   /// \param CLI      A descriptor of the canonical loop to workshare.
604   /// \param AllocaIP An insertion point for Alloca instructions usable in the
605   ///                 preheader of the loop.
606   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
607   ///                     the loop.
608   /// \param SchedKind Scheduling algorithm to use.
609   /// \param ChunkSize The chunk size for the inner loop.
610   /// \param HasSimdModifier Whether the simd modifier is present in the
611   ///                        schedule clause.
612   /// \param HasMonotonicModifier Whether the monotonic modifier is present in
613   ///                             the schedule clause.
614   /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
615   ///                                present in the schedule clause.
616   /// \param HasOrderedClause Whether the (parameterless) ordered clause is
617   ///                         present.
618   ///
619   /// \returns Point where to insert code after the workshare construct.
620   InsertPointTy applyWorkshareLoop(
621       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
622       bool NeedsBarrier,
623       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
624       Value *ChunkSize = nullptr, bool HasSimdModifier = false,
625       bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
626       bool HasOrderedClause = false);
627 
628   /// Tile a loop nest.
629   ///
630   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
631   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
632   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
633   /// of every loop and every tile sizes must be usable in the outermost
634   /// loop's preheader. This implies that the loop nest is rectangular.
635   ///
636   /// Example:
637   /// \code
638   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
639   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
640   ///         body(i, j);
641   /// \endcode
642   ///
643   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
644   /// \code
645   ///   for (int i1 = 0; i1 < 3; ++i1)
646   ///     for (int j1 = 0; j1 < 2; ++j1)
647   ///       for (int i2 = 0; i2 < 5; ++i2)
648   ///         for (int j2 = 0; j2 < 7; ++j2)
649   ///           body(i1*3+i2, j1*3+j2);
650   /// \endcode
651   ///
652   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
653   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
654   /// handles non-constant trip counts, non-constant tile sizes and trip counts
655   /// that are not multiples of the tile size. In the latter case the tile loop
656   /// of the last floor-loop iteration will have fewer iterations than specified
657   /// as its tile size.
658   ///
659   ///
660   /// @param DL        Debug location for instructions added by tiling, for
661   ///                  instance the floor- and tile trip count computation.
662   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
663   ///                  invalidated by this method, i.e. should not used after
664   ///                  tiling.
665   /// @param TileSizes For each loop in \p Loops, the tile size for that
666   ///                  dimensions.
667   ///
668   /// \returns A list of generated loops. Contains twice as many loops as the
669   ///          input loop nest; the first half are the floor loops and the
670   ///          second half are the tile loops.
671   std::vector<CanonicalLoopInfo *>
672   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
673             ArrayRef<Value *> TileSizes);
674 
675   /// Fully unroll a loop.
676   ///
677   /// Instead of unrolling the loop immediately (and duplicating its body
678   /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
679   /// metadata.
680   ///
681   /// \param DL   Debug location for instructions added by unrolling.
682   /// \param Loop The loop to unroll. The loop will be invalidated.
683   void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
684 
685   /// Fully or partially unroll a loop. How the loop is unrolled is determined
686   /// using LLVM's LoopUnrollPass.
687   ///
688   /// \param DL   Debug location for instructions added by unrolling.
689   /// \param Loop The loop to unroll. The loop will be invalidated.
690   void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
691 
692   /// Partially unroll a loop.
693   ///
694   /// The CanonicalLoopInfo of the unrolled loop for use with chained
695   /// loop-associated directive can be requested using \p UnrolledCLI. Not
696   /// needing the CanonicalLoopInfo allows more efficient code generation by
697   /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
698   /// A loop-associated directive applied to the unrolled loop needs to know the
699   /// new trip count which means that if using a heuristically determined unroll
700   /// factor (\p Factor == 0), that factor must be computed immediately. We are
701   /// using the same logic as the LoopUnrollPass to derived the unroll factor,
702   /// but which assumes that some canonicalization has taken place (e.g.
703   /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
704   /// better when the unrolled loop's CanonicalLoopInfo is not needed.
705   ///
706   /// \param DL          Debug location for instructions added by unrolling.
707   /// \param Loop        The loop to unroll. The loop will be invalidated.
708   /// \param Factor      The factor to unroll the loop by. A factor of 0
709   ///                    indicates that a heuristic should be used to determine
710   ///                    the unroll-factor.
711   /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
712   ///                    partially unrolled loop. Otherwise, uses loop metadata
713   ///                    to defer unrolling to the LoopUnrollPass.
714   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
715                          CanonicalLoopInfo **UnrolledCLI);
716 
717   /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
718   /// is cloned. The metadata which prevents vectorization is added to
719   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
720   /// to false.
721   ///
722   /// \param Loop        The loop to simd-ize.
723   /// \param AlignedVars The map which containts pairs of the pointer
724   ///                    and its corresponding alignment.
725   /// \param IfCond      The value which corresponds to the if clause
726   ///                    condition.
727   /// \param Order       The enum to map order clause.
728   /// \param Simdlen     The Simdlen length to apply to the simd loop.
729   /// \param Safelen     The Safelen length to apply to the simd loop.
730   void applySimd(CanonicalLoopInfo *Loop,
731                  MapVector<Value *, Value *> AlignedVars, Value *IfCond,
732                  omp::OrderKind Order, ConstantInt *Simdlen,
733                  ConstantInt *Safelen);
734 
735   /// Generator for '#omp flush'
736   ///
737   /// \param Loc The location where the flush directive was encountered
738   void createFlush(const LocationDescription &Loc);
739 
740   /// Generator for '#omp taskwait'
741   ///
742   /// \param Loc The location where the taskwait directive was encountered.
743   void createTaskwait(const LocationDescription &Loc);
744 
745   /// Generator for '#omp taskyield'
746   ///
747   /// \param Loc The location where the taskyield directive was encountered.
748   void createTaskyield(const LocationDescription &Loc);
749 
750   /// A struct to pack the relevant information for an OpenMP depend clause.
751   struct DependData {
752     omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
753     Type *DepValueType;
754     Value *DepVal;
755     explicit DependData() = default;
DependDataDependData756     DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
757                Value *DepVal)
758         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
759   };
760 
761   /// Generator for `#omp task`
762   ///
763   /// \param Loc The location where the task construct was encountered.
764   /// \param AllocaIP The insertion point to be used for alloca instructions.
765   /// \param BodyGenCB Callback that will generate the region code.
766   /// \param Tied True if the task is tied, false if the task is untied.
767   /// \param Final i1 value which is `true` if the task is final, `false` if the
768   ///              task is not final.
769   /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
770   ///                    task is generated, and the encountering thread must
771   ///                    suspend the current task region, for which execution
772   ///                    cannot be resumed until execution of the structured
773   ///                    block that is associated with the generated task is
774   ///                    completed.
775   InsertPointTy createTask(const LocationDescription &Loc,
776                            InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
777                            bool Tied = true, Value *Final = nullptr,
778                            Value *IfCondition = nullptr,
779                            SmallVector<DependData> Dependencies = {});
780 
781   /// Generator for the taskgroup construct
782   ///
783   /// \param Loc The location where the taskgroup construct was encountered.
784   /// \param AllocaIP The insertion point to be used for alloca instructions.
785   /// \param BodyGenCB Callback that will generate the region code.
786   InsertPointTy createTaskgroup(const LocationDescription &Loc,
787                                 InsertPointTy AllocaIP,
788                                 BodyGenCallbackTy BodyGenCB);
789 
790   /// Functions used to generate reductions. Such functions take two Values
791   /// representing LHS and RHS of the reduction, respectively, and a reference
792   /// to the value that is updated to refer to the reduction result.
793   using ReductionGenTy =
794       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
795 
796   /// Functions used to generate atomic reductions. Such functions take two
797   /// Values representing pointers to LHS and RHS of the reduction, as well as
798   /// the element type of these pointers. They are expected to atomically
799   /// update the LHS to the reduced value.
800   using AtomicReductionGenTy =
801       function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
802 
803   /// Information about an OpenMP reduction.
804   struct ReductionInfo {
ReductionInfoReductionInfo805     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
806                   ReductionGenTy ReductionGen,
807                   AtomicReductionGenTy AtomicReductionGen)
808         : ElementType(ElementType), Variable(Variable),
809           PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
810           AtomicReductionGen(AtomicReductionGen) {
811       assert(cast<PointerType>(Variable->getType())
812           ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
813     }
814 
815     /// Reduction element type, must match pointee type of variable.
816     Type *ElementType;
817 
818     /// Reduction variable of pointer type.
819     Value *Variable;
820 
821     /// Thread-private partial reduction variable.
822     Value *PrivateVariable;
823 
824     /// Callback for generating the reduction body. The IR produced by this will
825     /// be used to combine two values in a thread-safe context, e.g., under
826     /// lock or within the same thread, and therefore need not be atomic.
827     ReductionGenTy ReductionGen;
828 
829     /// Callback for generating the atomic reduction body, may be null. The IR
830     /// produced by this will be used to atomically combine two values during
831     /// reduction. If null, the implementation will use the non-atomic version
832     /// along with the appropriate synchronization mechanisms.
833     AtomicReductionGenTy AtomicReductionGen;
834   };
835 
836   // TODO: provide atomic and non-atomic reduction generators for reduction
837   // operators defined by the OpenMP specification.
838 
839   /// Generator for '#omp reduction'.
840   ///
841   /// Emits the IR instructing the runtime to perform the specific kind of
842   /// reductions. Expects reduction variables to have been privatized and
843   /// initialized to reduction-neutral values separately. Emits the calls to
844   /// runtime functions as well as the reduction function and the basic blocks
845   /// performing the reduction atomically and non-atomically.
846   ///
847   /// The code emitted for the following:
848   ///
849   /// \code
850   ///   type var_1;
851   ///   type var_2;
852   ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
853   ///   /* body */;
854   /// \endcode
855   ///
856   /// corresponds to the following sketch.
857   ///
858   /// \code
859   /// void _outlined_par() {
860   ///   // N is the number of different reductions.
861   ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
862   ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
863   ///                        _omp_reduction_func,
864   ///                        _gomp_critical_user.reduction.var)) {
865   ///   case 1: {
866   ///     var_1 = var_1 <reduction-op> privatized_var_1;
867   ///     var_2 = var_2 <reduction-op> privatized_var_2;
868   ///     // ...
869   ///    __kmpc_end_reduce(...);
870   ///     break;
871   ///   }
872   ///   case 2: {
873   ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
874   ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
875   ///     // ...
876   ///     break;
877   ///   }
878   ///   default: break;
879   ///   }
880   /// }
881   ///
882   /// void _omp_reduction_func(void **lhs, void **rhs) {
883   ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
884   ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
885   ///   // ...
886   /// }
887   /// \endcode
888   ///
889   /// \param Loc                The location where the reduction was
890   ///                           encountered. Must be within the associate
891   ///                           directive and after the last local access to the
892   ///                           reduction variables.
893   /// \param AllocaIP           An insertion point suitable for allocas usable
894   ///                           in reductions.
895   /// \param ReductionInfos     A list of info on each reduction variable.
896   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
897   InsertPointTy createReductions(const LocationDescription &Loc,
898                                  InsertPointTy AllocaIP,
899                                  ArrayRef<ReductionInfo> ReductionInfos,
900                                  bool IsNoWait = false);
901 
902   ///}
903 
904   /// Return the insertion point used by the underlying IRBuilder.
getInsertionPoint()905   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
906 
907   /// Update the internal location to \p Loc.
updateToLocation(const LocationDescription & Loc)908   bool updateToLocation(const LocationDescription &Loc) {
909     Builder.restoreIP(Loc.IP);
910     Builder.SetCurrentDebugLocation(Loc.DL);
911     return Loc.IP.getBlock() != nullptr;
912   }
913 
914   /// Return the function declaration for the runtime function with \p FnID.
915   FunctionCallee getOrCreateRuntimeFunction(Module &M,
916                                             omp::RuntimeFunction FnID);
917 
918   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
919 
920   /// Return the (LLVM-IR) string describing the source location \p LocStr.
921   Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
922 
923   /// Return the (LLVM-IR) string describing the default source location.
924   Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
925 
926   /// Return the (LLVM-IR) string describing the source location identified by
927   /// the arguments.
928   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
929                                  unsigned Line, unsigned Column,
930                                  uint32_t &SrcLocStrSize);
931 
932   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
933   /// fallback if \p DL does not specify the function name.
934   Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
935                                  Function *F = nullptr);
936 
937   /// Return the (LLVM-IR) string describing the source location \p Loc.
938   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
939                                  uint32_t &SrcLocStrSize);
940 
941   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
942   /// TODO: Create a enum class for the Reserve2Flags
943   Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
944                              omp::IdentFlag Flags = omp::IdentFlag(0),
945                              unsigned Reserve2Flags = 0);
946 
947   /// Create a hidden global flag \p Name in the module with initial value \p
948   /// Value.
949   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
950 
951   /// Create an offloading section struct used to register this global at
952   /// runtime.
953   ///
954   /// Type struct __tgt_offload_entry{
955   ///   void    *addr;      // Pointer to the offload entry info.
956   ///                       // (function or global)
957   ///   char    *name;      // Name of the function or global.
958   ///   size_t  size;       // Size of the entry info (0 if it a function).
959   ///   int32_t flags;
960   ///   int32_t reserved;
961   /// };
962   ///
963   /// \param Addr The pointer to the global being registered.
964   /// \param Name The symbol name associated with the global.
965   /// \param Size The size in bytes of the global (0 for functions).
966   /// \param Flags Flags associated with the entry.
967   /// \param SectionName The section this entry will be placed at.
968   void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
969                            int32_t Flags,
970                            StringRef SectionName = "omp_offloading_entries");
971 
972   /// Generate control flow and cleanup for cancellation.
973   ///
974   /// \param CancelFlag Flag indicating if the cancellation is performed.
975   /// \param CanceledDirective The kind of directive that is cancled.
976   /// \param ExitCB Extra code to be generated in the exit block.
977   void emitCancelationCheckImpl(Value *CancelFlag,
978                                 omp::Directive CanceledDirective,
979                                 FinalizeCallbackTy ExitCB = {});
980 
981   /// Generate a target region entry call.
982   ///
983   /// \param Loc The location at which the request originated and is fulfilled.
984   /// \param Return Return value of the created function returned by reference.
985   /// \param DeviceID Identifier for the device via the 'device' clause.
986   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
987   ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
988   /// \param NumThreads Number of threads via the 'thread_limit' clause.
989   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
990   /// \param KernelArgs Array of arguments to the kernel.
991   InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
992                                  Value *Ident, Value *DeviceID, Value *NumTeams,
993                                  Value *NumThreads, Value *HostPtr,
994                                  ArrayRef<Value *> KernelArgs);
995 
996   /// Generate a barrier runtime call.
997   ///
998   /// \param Loc The location at which the request originated and is fulfilled.
999   /// \param DK The directive which caused the barrier
1000   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1001   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1002   ///                        should be checked and acted upon.
1003   ///
1004   /// \returns The insertion point after the barrier.
1005   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1006                                 omp::Directive DK, bool ForceSimpleCall,
1007                                 bool CheckCancelFlag);
1008 
1009   /// Generate a flush runtime call.
1010   ///
1011   /// \param Loc The location at which the request originated and is fulfilled.
1012   void emitFlush(const LocationDescription &Loc);
1013 
1014   /// The finalization stack made up of finalize callbacks currently in-flight,
1015   /// wrapped into FinalizationInfo objects that reference also the finalization
1016   /// target block and the kind of cancellable directive.
1017   SmallVector<FinalizationInfo, 8> FinalizationStack;
1018 
1019   /// Return true if the last entry in the finalization stack is of kind \p DK
1020   /// and cancellable.
isLastFinalizationInfoCancellable(omp::Directive DK)1021   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1022     return !FinalizationStack.empty() &&
1023            FinalizationStack.back().IsCancellable &&
1024            FinalizationStack.back().DK == DK;
1025   }
1026 
1027   /// Generate a taskwait runtime call.
1028   ///
1029   /// \param Loc The location at which the request originated and is fulfilled.
1030   void emitTaskwaitImpl(const LocationDescription &Loc);
1031 
1032   /// Generate a taskyield runtime call.
1033   ///
1034   /// \param Loc The location at which the request originated and is fulfilled.
1035   void emitTaskyieldImpl(const LocationDescription &Loc);
1036 
1037   /// Return the current thread ID.
1038   ///
1039   /// \param Ident The ident (ident_t*) describing the query origin.
1040   Value *getOrCreateThreadID(Value *Ident);
1041 
1042   /// The OpenMPIRBuilder Configuration
1043   OpenMPIRBuilderConfig Config;
1044 
1045   /// The underlying LLVM-IR module
1046   Module &M;
1047 
1048   /// The LLVM-IR Builder used to create IR.
1049   IRBuilder<> Builder;
1050 
1051   /// Map to remember source location strings
1052   StringMap<Constant *> SrcLocStrMap;
1053 
1054   /// Map to remember existing ident_t*.
1055   DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
1056 
1057   /// Helper that contains information about regions we need to outline
1058   /// during finalization.
1059   struct OutlineInfo {
1060     using PostOutlineCBTy = std::function<void(Function &)>;
1061     PostOutlineCBTy PostOutlineCB;
1062     BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
1063     SmallVector<Value *, 2> ExcludeArgsFromAggregate;
1064 
1065     /// Collect all blocks in between EntryBB and ExitBB in both the given
1066     /// vector and set.
1067     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
1068                        SmallVectorImpl<BasicBlock *> &BlockVector);
1069 
1070     /// Return the function that contains the region to be outlined.
getFunctionOutlineInfo1071     Function *getFunction() const { return EntryBB->getParent(); }
1072   };
1073 
1074   /// Collection of regions that need to be outlined during finalization.
1075   SmallVector<OutlineInfo, 16> OutlineInfos;
1076 
1077   /// Collection of owned canonical loop objects that eventually need to be
1078   /// free'd.
1079   std::forward_list<CanonicalLoopInfo> LoopInfos;
1080 
1081   /// Add a new region that will be outlined later.
addOutlineInfo(OutlineInfo && OI)1082   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1083 
1084   /// An ordered map of auto-generated variables to their unique names.
1085   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1086   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1087   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1088   /// variables.
1089   StringMap<Constant*, BumpPtrAllocator> InternalVars;
1090 
1091   /// Create the global variable holding the offload mappings information.
1092   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
1093                                         std::string VarName);
1094 
1095   /// Create the global variable holding the offload names information.
1096   GlobalVariable *
1097   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
1098                         std::string VarName);
1099 
1100   struct MapperAllocas {
1101     AllocaInst *ArgsBase = nullptr;
1102     AllocaInst *Args = nullptr;
1103     AllocaInst *ArgSizes = nullptr;
1104   };
1105 
1106   /// Create the allocas instruction used in call to mapper functions.
1107   void createMapperAllocas(const LocationDescription &Loc,
1108                            InsertPointTy AllocaIP, unsigned NumOperands,
1109                            struct MapperAllocas &MapperAllocas);
1110 
1111   /// Create the call for the target mapper function.
1112   /// \param Loc The source location description.
1113   /// \param MapperFunc Function to be called.
1114   /// \param SrcLocInfo Source location information global.
1115   /// \param MaptypesArg The argument types.
1116   /// \param MapnamesArg The argument names.
1117   /// \param MapperAllocas The AllocaInst used for the call.
1118   /// \param DeviceID Device ID for the call.
1119   /// \param NumOperands Number of operands in the call.
1120   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1121                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1122                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1123                       unsigned NumOperands);
1124 
1125   /// Container for the arguments used to pass data to the runtime library.
1126   struct TargetDataRTArgs {
TargetDataRTArgsTargetDataRTArgs1127     explicit TargetDataRTArgs() {}
1128     /// The array of base pointer passed to the runtime library.
1129     Value *BasePointersArray = nullptr;
1130     /// The array of section pointers passed to the runtime library.
1131     Value *PointersArray = nullptr;
1132     /// The array of sizes passed to the runtime library.
1133     Value *SizesArray = nullptr;
1134     /// The array of map types passed to the runtime library for the beginning
1135     /// of the region or for the entire region if there are no separate map
1136     /// types for the region end.
1137     Value *MapTypesArray = nullptr;
1138     /// The array of map types passed to the runtime library for the end of the
1139     /// region, or nullptr if there are no separate map types for the region
1140     /// end.
1141     Value *MapTypesArrayEnd = nullptr;
1142     /// The array of user-defined mappers passed to the runtime library.
1143     Value *MappersArray = nullptr;
1144     /// The array of original declaration names of mapped pointers sent to the
1145     /// runtime library for debugging
1146     Value *MapNamesArray = nullptr;
1147   };
1148 
1149   /// Struct that keeps the information that should be kept throughout
1150   /// a 'target data' region.
1151   class TargetDataInfo {
1152     /// Set to true if device pointer information have to be obtained.
1153     bool RequiresDevicePointerInfo = false;
1154     /// Set to true if Clang emits separate runtime calls for the beginning and
1155     /// end of the region.  These calls might have separate map type arrays.
1156     bool SeparateBeginEndCalls = false;
1157 
1158   public:
1159     TargetDataRTArgs RTArgs;
1160 
1161     /// Indicate whether any user-defined mapper exists.
1162     bool HasMapper = false;
1163     /// The total number of pointers passed to the runtime library.
1164     unsigned NumberOfPtrs = 0u;
1165 
TargetDataInfo()1166     explicit TargetDataInfo() {}
TargetDataInfo(bool RequiresDevicePointerInfo,bool SeparateBeginEndCalls)1167     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1168                             bool SeparateBeginEndCalls)
1169         : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1170           SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1171     /// Clear information about the data arrays.
clearArrayInfo()1172     void clearArrayInfo() {
1173       RTArgs = TargetDataRTArgs();
1174       HasMapper = false;
1175       NumberOfPtrs = 0u;
1176     }
1177     /// Return true if the current target data information has valid arrays.
isValid()1178     bool isValid() {
1179       return RTArgs.BasePointersArray && RTArgs.PointersArray &&
1180              RTArgs.SizesArray && RTArgs.MapTypesArray &&
1181              (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
1182     }
requiresDevicePointerInfo()1183     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
separateBeginEndCalls()1184     bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1185   };
1186 
1187   /// Emit the arguments to be passed to the runtime library based on the
1188   /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
1189   /// ForEndCall, emit map types to be passed for the end of the region instead
1190   /// of the beginning.
1191   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
1192                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
1193                                     OpenMPIRBuilder::TargetDataInfo &Info,
1194                                     bool EmitDebug = false,
1195                                     bool ForEndCall = false);
1196 
1197   /// Creates offloading entry for the provided entry ID \a ID, address \a
1198   /// Addr, size \a Size, and flags \a Flags.
1199   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
1200                           int32_t Flags, GlobalValue::LinkageTypes);
1201 
1202   /// The kind of errors that can occur when emitting the offload entries and
1203   /// metadata.
1204   enum EmitMetadataErrorKind {
1205     EMIT_MD_TARGET_REGION_ERROR,
1206     EMIT_MD_DECLARE_TARGET_ERROR,
1207     EMIT_MD_GLOBAL_VAR_LINK_ERROR
1208   };
1209 
1210   /// Callback function type
1211   using EmitMetadataErrorReportFunctionTy =
1212       std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1213 
1214   // Emit the offloading entries and metadata so that the device codegen side
1215   // can easily figure out what to emit. The produced metadata looks like
1216   // this:
1217   //
1218   // !omp_offload.info = !{!1, ...}
1219   //
1220   // We only generate metadata for function that contain target regions.
1221   void createOffloadEntriesAndInfoMetadata(
1222       OffloadEntriesInfoManager &OffloadEntriesInfoManager,
1223       EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1224 
1225 public:
1226   /// Generator for __kmpc_copyprivate
1227   ///
1228   /// \param Loc The source location description.
1229   /// \param BufSize Number of elements in the buffer.
1230   /// \param CpyBuf List of pointers to data to be copied.
1231   /// \param CpyFn function to call for copying data.
1232   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1233   ///
1234   /// \return The insertion position *after* the CopyPrivate call.
1235 
1236   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1237                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
1238                                   llvm::Value *CpyFn, llvm::Value *DidIt);
1239 
1240   /// Generator for '#omp single'
1241   ///
1242   /// \param Loc The source location description.
1243   /// \param BodyGenCB Callback that will generate the region code.
1244   /// \param FiniCB Callback to finalize variable copies.
1245   /// \param IsNowait If false, a barrier is emitted.
1246   /// \param DidIt Local variable used as a flag to indicate 'single' thread
1247   ///
1248   /// \returns The insertion position *after* the single call.
1249   InsertPointTy createSingle(const LocationDescription &Loc,
1250                              BodyGenCallbackTy BodyGenCB,
1251                              FinalizeCallbackTy FiniCB, bool IsNowait,
1252                              llvm::Value *DidIt);
1253 
1254   /// Generator for '#omp master'
1255   ///
1256   /// \param Loc The insert and source location description.
1257   /// \param BodyGenCB Callback that will generate the region code.
1258   /// \param FiniCB Callback to finalize variable copies.
1259   ///
1260   /// \returns The insertion position *after* the master.
1261   InsertPointTy createMaster(const LocationDescription &Loc,
1262                              BodyGenCallbackTy BodyGenCB,
1263                              FinalizeCallbackTy FiniCB);
1264 
1265   /// Generator for '#omp masked'
1266   ///
1267   /// \param Loc The insert and source location description.
1268   /// \param BodyGenCB Callback that will generate the region code.
1269   /// \param FiniCB Callback to finialize variable copies.
1270   ///
1271   /// \returns The insertion position *after* the masked.
1272   InsertPointTy createMasked(const LocationDescription &Loc,
1273                              BodyGenCallbackTy BodyGenCB,
1274                              FinalizeCallbackTy FiniCB, Value *Filter);
1275 
1276   /// Generator for '#omp critical'
1277   ///
1278   /// \param Loc The insert and source location description.
1279   /// \param BodyGenCB Callback that will generate the region body code.
1280   /// \param FiniCB Callback to finalize variable copies.
1281   /// \param CriticalName name of the lock used by the critical directive
1282   /// \param HintInst Hint Instruction for hint clause associated with critical
1283   ///
1284   /// \returns The insertion position *after* the critical.
1285   InsertPointTy createCritical(const LocationDescription &Loc,
1286                                BodyGenCallbackTy BodyGenCB,
1287                                FinalizeCallbackTy FiniCB,
1288                                StringRef CriticalName, Value *HintInst);
1289 
1290   /// Generator for '#omp ordered depend (source | sink)'
1291   ///
1292   /// \param Loc The insert and source location description.
1293   /// \param AllocaIP The insertion point to be used for alloca instructions.
1294   /// \param NumLoops The number of loops in depend clause.
1295   /// \param StoreValues The value will be stored in vector address.
1296   /// \param Name The name of alloca instruction.
1297   /// \param IsDependSource If true, depend source; otherwise, depend sink.
1298   ///
1299   /// \return The insertion position *after* the ordered.
1300   InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1301                                     InsertPointTy AllocaIP, unsigned NumLoops,
1302                                     ArrayRef<llvm::Value *> StoreValues,
1303                                     const Twine &Name, bool IsDependSource);
1304 
1305   /// Generator for '#omp ordered [threads | simd]'
1306   ///
1307   /// \param Loc The insert and source location description.
1308   /// \param BodyGenCB Callback that will generate the region code.
1309   /// \param FiniCB Callback to finalize variable copies.
1310   /// \param IsThreads If true, with threads clause or without clause;
1311   /// otherwise, with simd clause;
1312   ///
1313   /// \returns The insertion position *after* the ordered.
1314   InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1315                                          BodyGenCallbackTy BodyGenCB,
1316                                          FinalizeCallbackTy FiniCB,
1317                                          bool IsThreads);
1318 
1319   /// Generator for '#omp sections'
1320   ///
1321   /// \param Loc The insert and source location description.
1322   /// \param AllocaIP The insertion points to be used for alloca instructions.
1323   /// \param SectionCBs Callbacks that will generate body of each section.
1324   /// \param PrivCB Callback to copy a given variable (think copy constructor).
1325   /// \param FiniCB Callback to finalize variable copies.
1326   /// \param IsCancellable Flag to indicate a cancellable parallel region.
1327   /// \param IsNowait If true, barrier - to ensure all sections are executed
1328   /// before moving forward will not be generated.
1329   /// \returns The insertion position *after* the sections.
1330   InsertPointTy createSections(const LocationDescription &Loc,
1331                                InsertPointTy AllocaIP,
1332                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1333                                PrivatizeCallbackTy PrivCB,
1334                                FinalizeCallbackTy FiniCB, bool IsCancellable,
1335                                bool IsNowait);
1336 
1337   /// Generator for '#omp section'
1338   ///
1339   /// \param Loc The insert and source location description.
1340   /// \param BodyGenCB Callback that will generate the region body code.
1341   /// \param FiniCB Callback to finalize variable copies.
1342   /// \returns The insertion position *after* the section.
1343   InsertPointTy createSection(const LocationDescription &Loc,
1344                               BodyGenCallbackTy BodyGenCB,
1345                               FinalizeCallbackTy FiniCB);
1346 
1347   /// Generate conditional branch and relevant BasicBlocks through which private
1348   /// threads copy the 'copyin' variables from Master copy to threadprivate
1349   /// copies.
1350   ///
1351   /// \param IP insertion block for copyin conditional
1352   /// \param MasterVarPtr a pointer to the master variable
1353   /// \param PrivateVarPtr a pointer to the threadprivate variable
1354   /// \param IntPtrTy Pointer size type
1355   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1356   //				 and copy.in.end block
1357   ///
1358   /// \returns The insertion point where copying operation to be emitted.
1359   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1360                                          Value *PrivateAddr,
1361                                          llvm::IntegerType *IntPtrTy,
1362                                          bool BranchtoEnd = true);
1363 
1364   /// Create a runtime call for kmpc_Alloc
1365   ///
1366   /// \param Loc The insert and source location description.
1367   /// \param Size Size of allocated memory space
1368   /// \param Allocator Allocator information instruction
1369   /// \param Name Name of call Instruction for OMP_alloc
1370   ///
1371   /// \returns CallInst to the OMP_Alloc call
1372   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1373                            Value *Allocator, std::string Name = "");
1374 
1375   /// Create a runtime call for kmpc_free
1376   ///
1377   /// \param Loc The insert and source location description.
1378   /// \param Addr Address of memory space to be freed
1379   /// \param Allocator Allocator information instruction
1380   /// \param Name Name of call Instruction for OMP_Free
1381   ///
1382   /// \returns CallInst to the OMP_Free call
1383   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1384                           Value *Allocator, std::string Name = "");
1385 
1386   /// Create a runtime call for kmpc_threadprivate_cached
1387   ///
1388   /// \param Loc The insert and source location description.
1389   /// \param Pointer pointer to data to be cached
1390   /// \param Size size of data to be cached
1391   /// \param Name Name of call Instruction for callinst
1392   ///
1393   /// \returns CallInst to the thread private cache call.
1394   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1395                                       llvm::Value *Pointer,
1396                                       llvm::ConstantInt *Size,
1397                                       const llvm::Twine &Name = Twine(""));
1398 
1399   /// Create a runtime call for __tgt_interop_init
1400   ///
1401   /// \param Loc The insert and source location description.
1402   /// \param InteropVar variable to be allocated
1403   /// \param InteropType type of interop operation
1404   /// \param Device devide to which offloading will occur
1405   /// \param NumDependences  number of dependence variables
1406   /// \param DependenceAddress pointer to dependence variables
1407   /// \param HaveNowaitClause does nowait clause exist
1408   ///
1409   /// \returns CallInst to the __tgt_interop_init call
1410   CallInst *createOMPInteropInit(const LocationDescription &Loc,
1411                                  Value *InteropVar,
1412                                  omp::OMPInteropType InteropType, Value *Device,
1413                                  Value *NumDependences,
1414                                  Value *DependenceAddress,
1415                                  bool HaveNowaitClause);
1416 
1417   /// Create a runtime call for __tgt_interop_destroy
1418   ///
1419   /// \param Loc The insert and source location description.
1420   /// \param InteropVar variable to be allocated
1421   /// \param Device devide to which offloading will occur
1422   /// \param NumDependences  number of dependence variables
1423   /// \param DependenceAddress pointer to dependence variables
1424   /// \param HaveNowaitClause does nowait clause exist
1425   ///
1426   /// \returns CallInst to the __tgt_interop_destroy call
1427   CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
1428                                     Value *InteropVar, Value *Device,
1429                                     Value *NumDependences,
1430                                     Value *DependenceAddress,
1431                                     bool HaveNowaitClause);
1432 
1433   /// Create a runtime call for __tgt_interop_use
1434   ///
1435   /// \param Loc The insert and source location description.
1436   /// \param InteropVar variable to be allocated
1437   /// \param Device devide to which offloading will occur
1438   /// \param NumDependences  number of dependence variables
1439   /// \param DependenceAddress pointer to dependence variables
1440   /// \param HaveNowaitClause does nowait clause exist
1441   ///
1442   /// \returns CallInst to the __tgt_interop_use call
1443   CallInst *createOMPInteropUse(const LocationDescription &Loc,
1444                                 Value *InteropVar, Value *Device,
1445                                 Value *NumDependences, Value *DependenceAddress,
1446                                 bool HaveNowaitClause);
1447 
1448   /// The `omp target` interface
1449   ///
1450   /// For more information about the usage of this interface,
1451   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1452   ///
1453   ///{
1454 
1455   /// Create a runtime call for kmpc_target_init
1456   ///
1457   /// \param Loc The insert and source location description.
1458   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1459   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
1460 
1461   /// Create a runtime call for kmpc_target_deinit
1462   ///
1463   /// \param Loc The insert and source location description.
1464   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1465   void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
1466 
1467   ///}
1468 
1469 private:
1470   // Sets the function attributes expected for the outlined function
1471   void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
1472                                                  int32_t NumTeams,
1473                                                  int32_t NumThreads);
1474 
1475   // Creates the function ID/Address for the given outlined function.
1476   // In the case of an embedded device function the address of the function is
1477   // used, in the case of a non-offload function a constant is created.
1478   Constant *createOutlinedFunctionID(Function *OutlinedFn,
1479                                      StringRef EntryFnIDName);
1480 
1481   // Creates the region entry address for the outlined function
1482   Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
1483                                         StringRef EntryFnName);
1484 
1485 public:
1486   /// Functions used to generate a function with the given name.
1487   using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
1488 
1489   /// Create a unique name for the entry function using the source location
1490   /// information of the current target region. The name will be something like:
1491   ///
1492   /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
1493   ///
1494   /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
1495   /// mangled name of the function that encloses the target region and BB is the
1496   /// line number of the target region. CC is a count added when more than one
1497   /// region is located at the same location.
1498   ///
1499   /// If this target outline function is not an offload entry, we don't need to
1500   /// register it. This may happen if it is guarded by an if clause that is
1501   /// false at compile time, or no target archs have been specified.
1502   ///
1503   /// The created target region ID is used by the runtime library to identify
1504   /// the current target region, so it only has to be unique and not
1505   /// necessarily point to anything. It could be the pointer to the outlined
1506   /// function that implements the target region, but we aren't using that so
1507   /// that the compiler doesn't need to keep that, and could therefore inline
1508   /// the host function if proven worthwhile during optimization. In the other
1509   /// hand, if emitting code for the device, the ID has to be the function
1510   /// address so that it can retrieved from the offloading entry and launched
1511   /// by the runtime library. We also mark the outlined function to have
1512   /// external linkage in case we are emitting code for the device, because
1513   /// these functions will be entry points to the device.
1514   ///
1515   /// \param InfoManager The info manager keeping track of the offload entries
1516   /// \param EntryInfo The entry information about the function
1517   /// \param GenerateFunctionCallback The callback function to generate the code
1518   /// \param NumTeams Number default teams
1519   /// \param NumThreads Number default threads
1520   /// \param OutlinedFunction Pointer to the outlined function
1521   /// \param EntryFnIDName Name of the ID o be created
1522   void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
1523                                 TargetRegionEntryInfo &EntryInfo,
1524                                 FunctionGenCallback &GenerateFunctionCallback,
1525                                 int32_t NumTeams, int32_t NumThreads,
1526                                 bool IsOffloadEntry, Function *&OutlinedFn,
1527                                 Constant *&OutlinedFnID);
1528 
1529   /// Registers the given function and sets up the attribtues of the function
1530   /// Returns the FunctionID.
1531   ///
1532   /// \param InfoManager The info manager keeping track of the offload entries
1533   /// \param EntryInfo The entry information about the function
1534   /// \param OutlinedFunction Pointer to the outlined function
1535   /// \param EntryFnName Name of the outlined function
1536   /// \param EntryFnIDName Name of the ID o be created
1537   /// \param NumTeams Number default teams
1538   /// \param NumThreads Number default threads
1539   Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
1540                                          TargetRegionEntryInfo &EntryInfo,
1541                                          Function *OutlinedFunction,
1542                                          StringRef EntryFnName,
1543                                          StringRef EntryFnIDName,
1544                                          int32_t NumTeams, int32_t NumThreads);
1545 
1546   /// Declarations for LLVM-IR types (simple, array, function and structure) are
1547   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
1548   /// we provide the declarations, the initializeTypes function will provide the
1549   /// values.
1550   ///
1551   ///{
1552 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
1553 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
1554   ArrayType *VarName##Ty = nullptr;                                            \
1555   PointerType *VarName##PtrTy = nullptr;
1556 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
1557   FunctionType *VarName = nullptr;                                             \
1558   PointerType *VarName##Ptr = nullptr;
1559 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
1560   StructType *VarName = nullptr;                                               \
1561   PointerType *VarName##Ptr = nullptr;
1562 #include "llvm/Frontend/OpenMP/OMPKinds.def"
1563 
1564   ///}
1565 
1566 private:
1567   /// Create all simple and struct types exposed by the runtime and remember
1568   /// the llvm::PointerTypes of them for easy access later.
1569   void initializeTypes(Module &M);
1570 
1571   /// Common interface for generating entry calls for OMP Directives.
1572   /// if the directive has a region/body, It will set the insertion
1573   /// point to the body
1574   ///
1575   /// \param OMPD Directive to generate entry blocks for
1576   /// \param EntryCall Call to the entry OMP Runtime Function
1577   /// \param ExitBB block where the region ends.
1578   /// \param Conditional indicate if the entry call result will be used
1579   ///        to evaluate a conditional of whether a thread will execute
1580   ///        body code or not.
1581   ///
1582   /// \return The insertion position in exit block
1583   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
1584                                          BasicBlock *ExitBB,
1585                                          bool Conditional = false);
1586 
1587   /// Common interface to finalize the region
1588   ///
1589   /// \param OMPD Directive to generate exiting code for
1590   /// \param FinIP Insertion point for emitting Finalization code and exit call
1591   /// \param ExitCall Call to the ending OMP Runtime Function
1592   /// \param HasFinalize indicate if the directive will require finalization
1593   ///         and has a finalization callback in the stack that
1594   ///        should be called.
1595   ///
1596   /// \return The insertion position in exit block
1597   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
1598                                         InsertPointTy FinIP,
1599                                         Instruction *ExitCall,
1600                                         bool HasFinalize = true);
1601 
1602   /// Common Interface to generate OMP inlined regions
1603   ///
1604   /// \param OMPD Directive to generate inlined region for
1605   /// \param EntryCall Call to the entry OMP Runtime Function
1606   /// \param ExitCall Call to the ending OMP Runtime Function
1607   /// \param BodyGenCB Body code generation callback.
1608   /// \param FiniCB Finalization Callback. Will be called when finalizing region
1609   /// \param Conditional indicate if the entry call result will be used
1610   ///        to evaluate a conditional of whether a thread will execute
1611   ///        body code or not.
1612   /// \param HasFinalize indicate if the directive will require finalization
1613   ///        and has a finalization callback in the stack that
1614   ///        should be called.
1615   /// \param IsCancellable if HasFinalize is set to true, indicate if the
1616   ///        the directive should be cancellable.
1617   /// \return The insertion point after the region
1618 
1619   InsertPointTy
1620   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
1621                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
1622                        FinalizeCallbackTy FiniCB, bool Conditional = false,
1623                        bool HasFinalize = true, bool IsCancellable = false);
1624 
1625   /// Get the platform-specific name separator.
1626   /// \param Parts different parts of the final name that needs separation
1627   /// \param FirstSeparator First separator used between the initial two
1628   ///        parts of the name.
1629   /// \param Separator separator used between all of the rest consecutive
1630   ///        parts of the name
1631   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
1632                                            StringRef FirstSeparator,
1633                                            StringRef Separator);
1634 
1635   /// Returns corresponding lock object for the specified critical region
1636   /// name. If the lock object does not exist it is created, otherwise the
1637   /// reference to the existing copy is returned.
1638   /// \param CriticalName Name of the critical region.
1639   ///
1640   Value *getOMPCriticalRegionLock(StringRef CriticalName);
1641 
1642   /// Callback type for Atomic Expression update
1643   /// ex:
1644   /// \code{.cpp}
1645   /// unsigned x = 0;
1646   /// #pragma omp atomic update
1647   /// x = Expr(x_old);  //Expr() is any legal operation
1648   /// \endcode
1649   ///
1650   /// \param XOld the value of the atomic memory address to use for update
1651   /// \param IRB reference to the IRBuilder to use
1652   ///
1653   /// \returns Value to update X to.
1654   using AtomicUpdateCallbackTy =
1655       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
1656 
1657 private:
1658   enum AtomicKind { Read, Write, Update, Capture, Compare };
1659 
1660   /// Determine whether to emit flush or not
1661   ///
1662   /// \param Loc    The insert and source location description.
1663   /// \param AO     The required atomic ordering
1664   /// \param AK     The OpenMP atomic operation kind used.
1665   ///
1666   /// \returns		wether a flush was emitted or not
1667   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
1668                                     AtomicOrdering AO, AtomicKind AK);
1669 
1670   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1671   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1672   /// Only Scalar data types.
1673   ///
1674   /// \param AllocaIP	  The insertion point to be used for alloca
1675   ///                   instructions.
1676   /// \param X			    The target atomic pointer to be updated
1677   /// \param XElemTy    The element type of the atomic pointer.
1678   /// \param Expr		    The value to update X with.
1679   /// \param AO			    Atomic ordering of the generated atomic
1680   ///                   instructions.
1681   /// \param RMWOp		  The binary operation used for update. If
1682   ///                   operation is not supported by atomicRMW,
1683   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
1684   ///                   Then a `cmpExch` based	atomic will be generated.
1685   /// \param UpdateOp 	Code generator for complex expressions that cannot be
1686   ///                   expressed through atomicrmw instruction.
1687   /// \param VolatileX	     true if \a X volatile?
1688   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1689   ///                     update expression, false otherwise.
1690   ///                     (e.g. true for X = X BinOp Expr)
1691   ///
1692   /// \returns A pair of the old value of X before the update, and the value
1693   ///          used for the update.
1694   std::pair<Value *, Value *>
1695   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
1696                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1697                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
1698                    bool IsXBinopExpr);
1699 
1700   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
1701   ///
1702   /// \Return The instruction
1703   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
1704                                 AtomicRMWInst::BinOp RMWOp);
1705 
1706 public:
1707   /// a struct to pack relevant information while generating atomic Ops
1708   struct AtomicOpValue {
1709     Value *Var = nullptr;
1710     Type *ElemTy = nullptr;
1711     bool IsSigned = false;
1712     bool IsVolatile = false;
1713   };
1714 
1715   /// Emit atomic Read for : V = X --- Only Scalar data types.
1716   ///
1717   /// \param Loc    The insert and source location description.
1718   /// \param X			The target pointer to be atomically read
1719   /// \param V			Memory address where to store atomically read
1720   /// 					    value
1721   /// \param AO			Atomic ordering of the generated atomic
1722   /// 					    instructions.
1723   ///
1724   /// \return Insertion point after generated atomic read IR.
1725   InsertPointTy createAtomicRead(const LocationDescription &Loc,
1726                                  AtomicOpValue &X, AtomicOpValue &V,
1727                                  AtomicOrdering AO);
1728 
1729   /// Emit atomic write for : X = Expr --- Only Scalar data types.
1730   ///
1731   /// \param Loc    The insert and source location description.
1732   /// \param X			The target pointer to be atomically written to
1733   /// \param Expr		The value to store.
1734   /// \param AO			Atomic ordering of the generated atomic
1735   ///               instructions.
1736   ///
1737   /// \return Insertion point after generated atomic Write IR.
1738   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
1739                                   AtomicOpValue &X, Value *Expr,
1740                                   AtomicOrdering AO);
1741 
1742   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1743   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1744   /// Only Scalar data types.
1745   ///
1746   /// \param Loc      The insert and source location description.
1747   /// \param AllocaIP The insertion point to be used for alloca instructions.
1748   /// \param X        The target atomic pointer to be updated
1749   /// \param Expr     The value to update X with.
1750   /// \param AO       Atomic ordering of the generated atomic instructions.
1751   /// \param RMWOp    The binary operation used for update. If operation
1752   ///                 is	not supported by atomicRMW, or belong to
1753   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
1754   ///                 atomic will be generated.
1755   /// \param UpdateOp 	Code generator for complex expressions that cannot be
1756   ///                   expressed through atomicrmw instruction.
1757   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1758   ///                     update expression, false otherwise.
1759   ///	                    (e.g. true for X = X BinOp Expr)
1760   ///
1761   /// \return Insertion point after generated atomic update IR.
1762   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
1763                                    InsertPointTy AllocaIP, AtomicOpValue &X,
1764                                    Value *Expr, AtomicOrdering AO,
1765                                    AtomicRMWInst::BinOp RMWOp,
1766                                    AtomicUpdateCallbackTy &UpdateOp,
1767                                    bool IsXBinopExpr);
1768 
1769   /// Emit atomic update for constructs: --- Only Scalar data types
1770   /// V = X; X = X BinOp Expr ,
1771   /// X = X BinOp Expr; V = X,
1772   /// V = X; X = Expr BinOp X,
1773   /// X = Expr BinOp X; V = X,
1774   /// V = X; X = UpdateOp(X),
1775   /// X = UpdateOp(X); V = X,
1776   ///
1777   /// \param Loc        The insert and source location description.
1778   /// \param AllocaIP   The insertion point to be used for alloca instructions.
1779   /// \param X          The target atomic pointer to be updated
1780   /// \param V          Memory address where to store captured value
1781   /// \param Expr       The value to update X with.
1782   /// \param AO         Atomic ordering of the generated atomic instructions
1783   /// \param RMWOp      The binary operation used for update. If
1784   ///                   operation is not supported by atomicRMW, or belong to
1785   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
1786   ///                   atomic will be generated.
1787   /// \param UpdateOp   Code generator for complex expressions that cannot be
1788   ///                   expressed through atomicrmw instruction.
1789   /// \param UpdateExpr true if X is an in place update of the form
1790   ///                   X = X BinOp Expr or X = Expr BinOp X
1791   /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
1792   ///                     update expression, false otherwise.
1793   ///                     (e.g. true for X = X BinOp Expr)
1794   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
1795   ///                        'v', not an updated one.
1796   ///
1797   /// \return Insertion point after generated atomic capture IR.
1798   InsertPointTy
1799   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
1800                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
1801                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1802                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
1803                       bool IsPostfixUpdate, bool IsXBinopExpr);
1804 
1805   /// Emit atomic compare for constructs: --- Only scalar data types
1806   /// cond-expr-stmt:
1807   /// x = x ordop expr ? expr : x;
1808   /// x = expr ordop x ? expr : x;
1809   /// x = x == e ? d : x;
1810   /// x = e == x ? d : x; (this one is not in the spec)
1811   /// cond-update-stmt:
1812   /// if (x ordop expr) { x = expr; }
1813   /// if (expr ordop x) { x = expr; }
1814   /// if (x == e) { x = d; }
1815   /// if (e == x) { x = d; } (this one is not in the spec)
1816   /// conditional-update-capture-atomic:
1817   /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
1818   /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
1819   /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1820   ///                                         IsFailOnly=true)
1821   /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
1822   /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1823   ///                                                IsFailOnly=true)
1824   ///
1825   /// \param Loc          The insert and source location description.
1826   /// \param X            The target atomic pointer to be updated.
1827   /// \param V            Memory address where to store captured value (for
1828   ///                     compare capture only).
1829   /// \param R            Memory address where to store comparison result
1830   ///                     (for compare capture with '==' only).
1831   /// \param E            The expected value ('e') for forms that use an
1832   ///                     equality comparison or an expression ('expr') for
1833   ///                     forms that use 'ordop' (logically an atomic maximum or
1834   ///                     minimum).
1835   /// \param D            The desired value for forms that use an equality
1836   ///                     comparison. If forms that use 'ordop', it should be
1837   ///                     \p nullptr.
1838   /// \param AO           Atomic ordering of the generated atomic instructions.
1839   /// \param Op           Atomic compare operation. It can only be ==, <, or >.
1840   /// \param IsXBinopExpr True if the conditional statement is in the form where
1841   ///                     x is on LHS. It only matters for < or >.
1842   /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
1843   ///                         'v', not an updated one (for compare capture
1844   ///                         only).
1845   /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
1846   ///                     only when the comparison fails. This is only valid for
1847   ///                     the case the comparison is '=='.
1848   ///
1849   /// \return Insertion point after generated atomic capture IR.
1850   InsertPointTy
1851   createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
1852                       AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
1853                       AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
1854                       bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
1855 
1856   /// Create the control flow structure of a canonical OpenMP loop.
1857   ///
1858   /// The emitted loop will be disconnected, i.e. no edge to the loop's
1859   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
1860   /// IRBuilder location is not preserved.
1861   ///
1862   /// \param DL        DebugLoc used for the instructions in the skeleton.
1863   /// \param TripCount Value to be used for the trip count.
1864   /// \param F         Function in which to insert the BasicBlocks.
1865   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
1866   ///                         typically the body itself.
1867   /// \param PostInsertBefore Where to insert BBs that execute after the body.
1868   /// \param Name      Base name used to derive BB
1869   ///                  and instruction names.
1870   ///
1871   /// \returns The CanonicalLoopInfo that represents the emitted loop.
1872   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
1873                                         Function *F,
1874                                         BasicBlock *PreInsertBefore,
1875                                         BasicBlock *PostInsertBefore,
1876                                         const Twine &Name = {});
1877   /// OMP Offload Info Metadata name string
1878   const std::string ompOffloadInfoName = "omp_offload.info";
1879 
1880   /// Loads all the offload entries information from the host IR
1881   /// metadata. This function is only meant to be used with device code
1882   /// generation.
1883   ///
1884   /// \param M         Module to load Metadata info from. Module passed maybe
1885   /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
1886   /// \param OffloadEntriesInfoManager Initialize Offload Entry information.
1887   void
1888   loadOffloadInfoMetadata(Module &M,
1889                           OffloadEntriesInfoManager &OffloadEntriesInfoManager);
1890 
1891   /// Gets (if variable with the given name already exist) or creates
1892   /// internal global variable with the specified Name. The created variable has
1893   /// linkage CommonLinkage by default and is initialized by null value.
1894   /// \param Ty Type of the global variable. If it is exist already the type
1895   /// must be the same.
1896   /// \param Name Name of the variable.
1897   GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
1898                                               unsigned AddressSpace = 0);
1899 };
1900 
1901 /// Data structure to contain the information needed to uniquely identify
1902 /// a target entry.
1903 struct TargetRegionEntryInfo {
1904   std::string ParentName;
1905   unsigned DeviceID;
1906   unsigned FileID;
1907   unsigned Line;
1908   unsigned Count;
1909 
TargetRegionEntryInfoTargetRegionEntryInfo1910   TargetRegionEntryInfo()
1911       : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {}
1912   TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
1913                         unsigned FileID, unsigned Line, unsigned Count = 0)
ParentNameTargetRegionEntryInfo1914       : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
1915         Count(Count) {}
1916 
1917   static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
1918                                          StringRef ParentName,
1919                                          unsigned DeviceID, unsigned FileID,
1920                                          unsigned Line, unsigned Count);
1921 
1922   bool operator<(const TargetRegionEntryInfo RHS) const {
1923     return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
1924            std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
1925                            RHS.Count);
1926   }
1927 };
1928 
1929 /// Class that manages information about offload code regions and data
1930 class OffloadEntriesInfoManager {
1931   /// Number of entries registered so far.
1932   OpenMPIRBuilderConfig Config;
1933   unsigned OffloadingEntriesNum = 0;
1934 
1935 public:
setConfig(OpenMPIRBuilderConfig C)1936   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
1937 
1938   /// Base class of the entries info.
1939   class OffloadEntryInfo {
1940   public:
1941     /// Kind of a given entry.
1942     enum OffloadingEntryInfoKinds : unsigned {
1943       /// Entry is a target region.
1944       OffloadingEntryInfoTargetRegion = 0,
1945       /// Entry is a declare target variable.
1946       OffloadingEntryInfoDeviceGlobalVar = 1,
1947       /// Invalid entry info.
1948       OffloadingEntryInfoInvalid = ~0u
1949     };
1950 
1951   protected:
1952     OffloadEntryInfo() = delete;
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)1953     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
OffloadEntryInfo(OffloadingEntryInfoKinds Kind,unsigned Order,uint32_t Flags)1954     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
1955                               uint32_t Flags)
1956         : Flags(Flags), Order(Order), Kind(Kind) {}
1957     ~OffloadEntryInfo() = default;
1958 
1959   public:
isValid()1960     bool isValid() const { return Order != ~0u; }
getOrder()1961     unsigned getOrder() const { return Order; }
getKind()1962     OffloadingEntryInfoKinds getKind() const { return Kind; }
getFlags()1963     uint32_t getFlags() const { return Flags; }
setFlags(uint32_t NewFlags)1964     void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
getAddress()1965     Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
setAddress(Constant * V)1966     void setAddress(Constant *V) {
1967       assert(!Addr.pointsToAliveValue() && "Address has been set before!");
1968       Addr = V;
1969     }
classof(const OffloadEntryInfo * Info)1970     static bool classof(const OffloadEntryInfo *Info) { return true; }
1971 
1972   private:
1973     /// Address of the entity that has to be mapped for offloading.
1974     WeakTrackingVH Addr;
1975 
1976     /// Flags associated with the device global.
1977     uint32_t Flags = 0u;
1978 
1979     /// Order this entry was emitted.
1980     unsigned Order = ~0u;
1981 
1982     OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
1983   };
1984 
1985   /// Return true if a there are no entries defined.
1986   bool empty() const;
1987   /// Return number of entries defined so far.
size()1988   unsigned size() const { return OffloadingEntriesNum; }
1989 
OffloadEntriesInfoManager()1990   OffloadEntriesInfoManager() : Config() {}
1991 
1992   //
1993   // Target region entries related.
1994   //
1995 
1996   /// Kind of the target registry entry.
1997   enum OMPTargetRegionEntryKind : uint32_t {
1998     /// Mark the entry as target region.
1999     OMPTargetRegionEntryTargetRegion = 0x0,
2000     /// Mark the entry as a global constructor.
2001     OMPTargetRegionEntryCtor = 0x02,
2002     /// Mark the entry as a global destructor.
2003     OMPTargetRegionEntryDtor = 0x04,
2004   };
2005 
2006   /// Target region entries info.
2007   class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
2008     /// Address that can be used as the ID of the entry.
2009     Constant *ID = nullptr;
2010 
2011   public:
OffloadEntryInfoTargetRegion()2012     OffloadEntryInfoTargetRegion()
2013         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
OffloadEntryInfoTargetRegion(unsigned Order,Constant * Addr,Constant * ID,OMPTargetRegionEntryKind Flags)2014     explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
2015                                           Constant *ID,
2016                                           OMPTargetRegionEntryKind Flags)
2017         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
2018           ID(ID) {
2019       setAddress(Addr);
2020     }
2021 
getID()2022     Constant *getID() const { return ID; }
setID(Constant * V)2023     void setID(Constant *V) {
2024       assert(!ID && "ID has been set before!");
2025       ID = V;
2026     }
classof(const OffloadEntryInfo * Info)2027     static bool classof(const OffloadEntryInfo *Info) {
2028       return Info->getKind() == OffloadingEntryInfoTargetRegion;
2029     }
2030   };
2031 
2032   /// Initialize target region entry.
2033   /// This is ONLY needed for DEVICE compilation.
2034   void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
2035                                        unsigned Order);
2036   /// Register target region entry.
2037   void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
2038                                      Constant *Addr, Constant *ID,
2039                                      OMPTargetRegionEntryKind Flags);
2040   /// Return true if a target region entry with the provided information
2041   /// exists.
2042   bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
2043                                 bool IgnoreAddressId = false) const;
2044 
2045   // Return the Name based on \a EntryInfo using the next available Count.
2046   void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
2047                                   const TargetRegionEntryInfo &EntryInfo);
2048 
2049   /// brief Applies action \a Action on all registered entries.
2050   typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
2051                             const OffloadEntryInfoTargetRegion &)>
2052       OffloadTargetRegionEntryInfoActTy;
2053   void
2054   actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
2055 
2056   //
2057   // Device global variable entries related.
2058   //
2059 
2060   /// Kind of the global variable entry..
2061   enum OMPTargetGlobalVarEntryKind : uint32_t {
2062     /// Mark the entry as a to declare target.
2063     OMPTargetGlobalVarEntryTo = 0x0,
2064     /// Mark the entry as a to declare target link.
2065     OMPTargetGlobalVarEntryLink = 0x1,
2066   };
2067 
2068   /// Device global variable entries info.
2069   class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
2070     /// Type of the global variable.
2071     int64_t VarSize;
2072     GlobalValue::LinkageTypes Linkage;
2073 
2074   public:
OffloadEntryInfoDeviceGlobalVar()2075     OffloadEntryInfoDeviceGlobalVar()
2076         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
OffloadEntryInfoDeviceGlobalVar(unsigned Order,OMPTargetGlobalVarEntryKind Flags)2077     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
2078                                              OMPTargetGlobalVarEntryKind Flags)
2079         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
OffloadEntryInfoDeviceGlobalVar(unsigned Order,Constant * Addr,int64_t VarSize,OMPTargetGlobalVarEntryKind Flags,GlobalValue::LinkageTypes Linkage)2080     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
2081                                              int64_t VarSize,
2082                                              OMPTargetGlobalVarEntryKind Flags,
2083                                              GlobalValue::LinkageTypes Linkage)
2084         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
2085           VarSize(VarSize), Linkage(Linkage) {
2086       setAddress(Addr);
2087     }
2088 
getVarSize()2089     int64_t getVarSize() const { return VarSize; }
setVarSize(int64_t Size)2090     void setVarSize(int64_t Size) { VarSize = Size; }
getLinkage()2091     GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
setLinkage(GlobalValue::LinkageTypes LT)2092     void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
classof(const OffloadEntryInfo * Info)2093     static bool classof(const OffloadEntryInfo *Info) {
2094       return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
2095     }
2096   };
2097 
2098   /// Initialize device global variable entry.
2099   /// This is ONLY used for DEVICE compilation.
2100   void initializeDeviceGlobalVarEntryInfo(StringRef Name,
2101                                           OMPTargetGlobalVarEntryKind Flags,
2102                                           unsigned Order);
2103 
2104   /// Register device global variable entry.
2105   void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
2106                                         int64_t VarSize,
2107                                         OMPTargetGlobalVarEntryKind Flags,
2108                                         GlobalValue::LinkageTypes Linkage);
2109   /// Checks if the variable with the given name has been registered already.
hasDeviceGlobalVarEntryInfo(StringRef VarName)2110   bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
2111     return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
2112   }
2113   /// Applies action \a Action on all registered entries.
2114   typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
2115       OffloadDeviceGlobalVarEntryInfoActTy;
2116   void actOnDeviceGlobalVarEntriesInfo(
2117       const OffloadDeviceGlobalVarEntryInfoActTy &Action);
2118 
2119 private:
2120   /// Return the count of entries at a particular source location.
2121   unsigned
2122   getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
2123 
2124   /// Update the count of entries at a particular source location.
2125   void
2126   incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
2127 
2128   static TargetRegionEntryInfo
getTargetRegionEntryCountKey(const TargetRegionEntryInfo & EntryInfo)2129   getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
2130     return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
2131                                  EntryInfo.FileID, EntryInfo.Line, 0);
2132   }
2133 
2134   // Count of entries at a location.
2135   std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
2136 
2137   // Storage for target region entries kind.
2138   typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
2139       OffloadEntriesTargetRegionTy;
2140   OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
2141   /// Storage for device global variable entries kind. The storage is to be
2142   /// indexed by mangled name.
2143   typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
2144       OffloadEntriesDeviceGlobalVarTy;
2145   OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
2146 };
2147 
2148 /// Class to represented the control flow structure of an OpenMP canonical loop.
2149 ///
2150 /// The control-flow structure is standardized for easy consumption by
2151 /// directives associated with loops. For instance, the worksharing-loop
2152 /// construct may change this control flow such that each loop iteration is
2153 /// executed on only one thread. The constraints of a canonical loop in brief
2154 /// are:
2155 ///
2156 ///  * The number of loop iterations must have been computed before entering the
2157 ///    loop.
2158 ///
2159 ///  * Has an (unsigned) logical induction variable that starts at zero and
2160 ///    increments by one.
2161 ///
2162 ///  * The loop's CFG itself has no side-effects. The OpenMP specification
2163 ///    itself allows side-effects, but the order in which they happen, including
2164 ///    how often or whether at all, is unspecified. We expect that the frontend
2165 ///    will emit those side-effect instructions somewhere (e.g. before the loop)
2166 ///    such that the CanonicalLoopInfo itself can be side-effect free.
2167 ///
2168 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2169 /// execution of a loop body that satifies these constraints. It does NOT
2170 /// represent arbitrary SESE regions that happen to contain a loop. Do not use
2171 /// CanonicalLoopInfo for such purposes.
2172 ///
2173 /// The control flow can be described as follows:
2174 ///
2175 ///     Preheader
2176 ///        |
2177 ///  /-> Header
2178 ///  |     |
2179 ///  |    Cond---\
2180 ///  |     |     |
2181 ///  |    Body   |
2182 ///  |    | |    |
2183 ///  |   <...>   |
2184 ///  |    | |    |
2185 ///   \--Latch   |
2186 ///              |
2187 ///             Exit
2188 ///              |
2189 ///            After
2190 ///
2191 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2192 /// including) and end at AfterIP (at the After's first instruction, excluding).
2193 /// That is, instructions in the Preheader and After blocks (except the
2194 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2195 /// side-effects. Typically, the Preheader is used to compute the loop's trip
2196 /// count. The instructions from BodyIP (at the Body block's first instruction,
2197 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2198 /// control and thus can have side-effects. The body block is the single entry
2199 /// point into the loop body, which may contain arbitrary control flow as long
2200 /// as all control paths eventually branch to the Latch block.
2201 ///
2202 /// TODO: Consider adding another standardized BasicBlock between Body CFG and
2203 /// Latch to guarantee that there is only a single edge to the latch. It would
2204 /// make loop transformations easier to not needing to consider multiple
2205 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2206 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2207 /// executes after each body iteration.
2208 ///
2209 /// There must be no loop-carried dependencies through llvm::Values. This is
2210 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2211 /// for the induction variable.
2212 ///
2213 /// All code in Header, Cond, Latch and Exit (plus the terminator of the
2214 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2215 /// by assertOK(). They are expected to not be modified unless explicitly
2216 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2217 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2218 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2219 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2220 /// anymore as its underlying control flow may not exist anymore.
2221 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2222 /// may also return a new CanonicalLoopInfo that can be passed to other
2223 /// loop-associated construct implementing methods. These loop-transforming
2224 /// methods may either create a new CanonicalLoopInfo usually using
2225 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2226 /// modify one of the input CanonicalLoopInfo and return it as representing the
2227 /// modified loop. What is done is an implementation detail of
2228 /// transformation-implementing method and callers should always assume that the
2229 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2230 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2231 /// created by createCanonicalLoop, such that transforming methods do not have
2232 /// to special case where the CanonicalLoopInfo originated from.
2233 ///
2234 /// Generally, methods consuming CanonicalLoopInfo do not need an
2235 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2236 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2237 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2238 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2239 /// any InsertPoint in the Preheader, After or Block can still be used after
2240 /// calling such a method.
2241 ///
2242 /// TODO: Provide mechanisms for exception handling and cancellation points.
2243 ///
2244 /// Defined outside OpenMPIRBuilder because nested classes cannot be
2245 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2246 class CanonicalLoopInfo {
2247   friend class OpenMPIRBuilder;
2248 
2249 private:
2250   BasicBlock *Header = nullptr;
2251   BasicBlock *Cond = nullptr;
2252   BasicBlock *Latch = nullptr;
2253   BasicBlock *Exit = nullptr;
2254 
2255   /// Add the control blocks of this loop to \p BBs.
2256   ///
2257   /// This does not include any block from the body, including the one returned
2258   /// by getBody().
2259   ///
2260   /// FIXME: This currently includes the Preheader and After blocks even though
2261   /// their content is (mostly) not under CanonicalLoopInfo's control.
2262   /// Re-evaluated whether this makes sense.
2263   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2264 
2265   /// Sets the number of loop iterations to the given value. This value must be
2266   /// valid in the condition block (i.e., defined in the preheader) and is
2267   /// interpreted as an unsigned integer.
2268   void setTripCount(Value *TripCount);
2269 
2270   /// Replace all uses of the canonical induction variable in the loop body with
2271   /// a new one.
2272   ///
2273   /// The intended use case is to update the induction variable for an updated
2274   /// iteration space such that it can stay normalized in the 0...tripcount-1
2275   /// range.
2276   ///
2277   /// The \p Updater is called with the (presumable updated) current normalized
2278   /// induction variable and is expected to return the value that uses of the
2279   /// pre-updated induction values should use instead, typically dependent on
2280   /// the new induction variable. This is a lambda (instead of e.g. just passing
2281   /// the new value) to be able to distinguish the uses of the pre-updated
2282   /// induction variable and uses of the induction varible to compute the
2283   /// updated induction variable value.
2284   void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2285 
2286 public:
2287   /// Returns whether this object currently represents the IR of a loop. If
2288   /// returning false, it may have been consumed by a loop transformation or not
2289   /// been intialized. Do not use in this case;
isValid()2290   bool isValid() const { return Header; }
2291 
2292   /// The preheader ensures that there is only a single edge entering the loop.
2293   /// Code that must be execute before any loop iteration can be emitted here,
2294   /// such as computing the loop trip count and begin lifetime markers. Code in
2295   /// the preheader is not considered part of the canonical loop.
2296   BasicBlock *getPreheader() const;
2297 
2298   /// The header is the entry for each iteration. In the canonical control flow,
2299   /// it only contains the PHINode for the induction variable.
getHeader()2300   BasicBlock *getHeader() const {
2301     assert(isValid() && "Requires a valid canonical loop");
2302     return Header;
2303   }
2304 
2305   /// The condition block computes whether there is another loop iteration. If
2306   /// yes, branches to the body; otherwise to the exit block.
getCond()2307   BasicBlock *getCond() const {
2308     assert(isValid() && "Requires a valid canonical loop");
2309     return Cond;
2310   }
2311 
2312   /// The body block is the single entry for a loop iteration and not controlled
2313   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2314   /// eventually branch to the \p Latch block.
getBody()2315   BasicBlock *getBody() const {
2316     assert(isValid() && "Requires a valid canonical loop");
2317     return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2318   }
2319 
2320   /// Reaching the latch indicates the end of the loop body code. In the
2321   /// canonical control flow, it only contains the increment of the induction
2322   /// variable.
getLatch()2323   BasicBlock *getLatch() const {
2324     assert(isValid() && "Requires a valid canonical loop");
2325     return Latch;
2326   }
2327 
2328   /// Reaching the exit indicates no more iterations are being executed.
getExit()2329   BasicBlock *getExit() const {
2330     assert(isValid() && "Requires a valid canonical loop");
2331     return Exit;
2332   }
2333 
2334   /// The after block is intended for clean-up code such as lifetime end
2335   /// markers. It is separate from the exit block to ensure, analogous to the
2336   /// preheader, it having just a single entry edge and being free from PHI
2337   /// nodes should there be multiple loop exits (such as from break
2338   /// statements/cancellations).
getAfter()2339   BasicBlock *getAfter() const {
2340     assert(isValid() && "Requires a valid canonical loop");
2341     return Exit->getSingleSuccessor();
2342   }
2343 
2344   /// Returns the llvm::Value containing the number of loop iterations. It must
2345   /// be valid in the preheader and always interpreted as an unsigned integer of
2346   /// any bit-width.
getTripCount()2347   Value *getTripCount() const {
2348     assert(isValid() && "Requires a valid canonical loop");
2349     Instruction *CmpI = &Cond->front();
2350     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2351     return CmpI->getOperand(1);
2352   }
2353 
2354   /// Returns the instruction representing the current logical induction
2355   /// variable. Always unsigned, always starting at 0 with an increment of one.
getIndVar()2356   Instruction *getIndVar() const {
2357     assert(isValid() && "Requires a valid canonical loop");
2358     Instruction *IndVarPHI = &Header->front();
2359     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2360     return IndVarPHI;
2361   }
2362 
2363   /// Return the type of the induction variable (and the trip count).
getIndVarType()2364   Type *getIndVarType() const {
2365     assert(isValid() && "Requires a valid canonical loop");
2366     return getIndVar()->getType();
2367   }
2368 
2369   /// Return the insertion point for user code before the loop.
getPreheaderIP()2370   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
2371     assert(isValid() && "Requires a valid canonical loop");
2372     BasicBlock *Preheader = getPreheader();
2373     return {Preheader, std::prev(Preheader->end())};
2374   };
2375 
2376   /// Return the insertion point for user code in the body.
getBodyIP()2377   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
2378     assert(isValid() && "Requires a valid canonical loop");
2379     BasicBlock *Body = getBody();
2380     return {Body, Body->begin()};
2381   };
2382 
2383   /// Return the insertion point for user code after the loop.
getAfterIP()2384   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
2385     assert(isValid() && "Requires a valid canonical loop");
2386     BasicBlock *After = getAfter();
2387     return {After, After->begin()};
2388   };
2389 
getFunction()2390   Function *getFunction() const {
2391     assert(isValid() && "Requires a valid canonical loop");
2392     return Header->getParent();
2393   }
2394 
2395   /// Consistency self-check.
2396   void assertOK() const;
2397 
2398   /// Invalidate this loop. That is, the underlying IR does not fulfill the
2399   /// requirements of an OpenMP canonical loop anymore.
2400   void invalidate();
2401 };
2402 
2403 } // end namespace llvm
2404 
2405 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
2406