1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Frontend/OpenMP/OMPConstants.h"
18 #include "llvm/IR/DebugLoc.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/Support/Allocator.h"
21 #include <forward_list>
22 
23 namespace llvm {
24 class CanonicalLoopInfo;
25 
26 /// Move the instruction after an InsertPoint to the beginning of another
27 /// BasicBlock.
28 ///
29 /// The instructions after \p IP are moved to the beginning of \p New which must
30 /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
31 /// \p New will be added such that there is no semantic change. Otherwise, the
32 /// \p IP insert block remains degenerate and it is up to the caller to insert a
33 /// terminator.
34 void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
35               bool CreateBranch);
36 
37 /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
38 /// insert location will stick to after the instruction before the insertion
39 /// point (instead of moving with the instruction the InsertPoint stores
40 /// internally).
41 void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
42 
43 /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
44 /// (missing the terminator).
45 ///
46 /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
47 /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
48 /// is true, a branch to the new successor will new created such that
49 /// semantically there is no change; otherwise the block of the insertion point
50 /// remains degenerate and it is the caller's responsibility to insert a
51 /// terminator. Returns the new successor block.
52 BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
53                     llvm::Twine Name = {});
54 
55 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
56 /// degenerate (missing the terminator).  Its new insert location will stick to
57 /// after the instruction before the insertion point (instead of moving with the
58 /// instruction the InsertPoint stores internally).
59 BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
60                     llvm::Twine Name = {});
61 
62 /// Split a BasicBlock at \p Builder's insertion point, even if the block is
63 /// degenerate (missing the terminator).  Its new insert location will stick to
64 /// after the instruction before the insertion point (instead of moving with the
65 /// instruction the InsertPoint stores internally).
66 BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
67 
68 /// Like splitBB, but reuses the current block's name for the new name.
69 BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
70                               llvm::Twine Suffix = ".split");
71 
72 /// An interface to create LLVM-IR for OpenMP directives.
73 ///
74 /// Each OpenMP directive has a corresponding public generator method.
75 class OpenMPIRBuilder {
76 public:
77   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
78   /// not have an effect on \p M (see initialize).
79   OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
80   ~OpenMPIRBuilder();
81 
82   /// Initialize the internal state, this will put structures types and
83   /// potentially other helpers into the underlying module. Must be called
84   /// before any other method and only once!
85   void initialize();
86 
87   /// Finalize the underlying module, e.g., by outlining regions.
88   /// \param Fn                    The function to be finalized. If not used,
89   ///                              all functions are finalized.
90   void finalize(Function *Fn = nullptr);
91 
92   /// Add attributes known for \p FnID to \p Fn.
93   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
94 
95   /// Type used throughout for insertion points.
96   using InsertPointTy = IRBuilder<>::InsertPoint;
97 
98   /// Callback type for variable finalization (think destructors).
99   ///
100   /// \param CodeGenIP is the insertion point at which the finalization code
101   ///                  should be placed.
102   ///
103   /// A finalize callback knows about all objects that need finalization, e.g.
104   /// destruction, when the scope of the currently generated construct is left
105   /// at the time, and location, the callback is invoked.
106   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
107 
108   struct FinalizationInfo {
109     /// The finalization callback provided by the last in-flight invocation of
110     /// createXXXX for the directive of kind DK.
111     FinalizeCallbackTy FiniCB;
112 
113     /// The directive kind of the innermost directive that has an associated
114     /// region which might require finalization when it is left.
115     omp::Directive DK;
116 
117     /// Flag to indicate if the directive is cancellable.
118     bool IsCancellable;
119   };
120 
121   /// Push a finalization callback on the finalization stack.
122   ///
123   /// NOTE: Temporary solution until Clang CG is gone.
124   void pushFinalizationCB(const FinalizationInfo &FI) {
125     FinalizationStack.push_back(FI);
126   }
127 
128   /// Pop the last finalization callback from the finalization stack.
129   ///
130   /// NOTE: Temporary solution until Clang CG is gone.
131   void popFinalizationCB() { FinalizationStack.pop_back(); }
132 
133   /// Callback type for body (=inner region) code generation
134   ///
135   /// The callback takes code locations as arguments, each describing a
136   /// location where additional instructions can be inserted.
137   ///
138   /// The CodeGenIP may be in the middle of a basic block or point to the end of
139   /// it. The basic block may have a terminator or be degenerate. The callback
140   /// function may just insert instructions at that position, but also split the
141   /// block (without the Before argument of BasicBlock::splitBasicBlock such
142   /// that the identify of the split predecessor block is preserved) and insert
143   /// additional control flow, including branches that do not lead back to what
144   /// follows the CodeGenIP. Note that since the callback is allowed to split
145   /// the block, callers must assume that InsertPoints to positions in the
146   /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
147   /// such InsertPoints need to be preserved, it can split the block itself
148   /// before calling the callback.
149   ///
150   /// AllocaIP and CodeGenIP must not point to the same position.
151   ///
152   /// \param AllocaIP is the insertion point at which new alloca instructions
153   ///                 should be placed. The BasicBlock it is pointing to must
154   ///                 not be split.
155   /// \param CodeGenIP is the insertion point at which the body code should be
156   ///                  placed.
157   using BodyGenCallbackTy =
158       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
159 
160   // This is created primarily for sections construct as llvm::function_ref
161   // (BodyGenCallbackTy) is not storable (as described in the comments of
162   // function_ref class - function_ref contains non-ownable reference
163   // to the callable.
164   using StorableBodyGenCallbackTy =
165       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
166 
167   /// Callback type for loop body code generation.
168   ///
169   /// \param CodeGenIP is the insertion point where the loop's body code must be
170   ///                  placed. This will be a dedicated BasicBlock with a
171   ///                  conditional branch from the loop condition check and
172   ///                  terminated with an unconditional branch to the loop
173   ///                  latch.
174   /// \param IndVar    is the induction variable usable at the insertion point.
175   using LoopBodyGenCallbackTy =
176       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
177 
178   /// Callback type for variable privatization (think copy & default
179   /// constructor).
180   ///
181   /// \param AllocaIP is the insertion point at which new alloca instructions
182   ///                 should be placed.
183   /// \param CodeGenIP is the insertion point at which the privatization code
184   ///                  should be placed.
185   /// \param Original The value being copied/created, should not be used in the
186   ///                 generated IR.
187   /// \param Inner The equivalent of \p Original that should be used in the
188   ///              generated IR; this is equal to \p Original if the value is
189   ///              a pointer and can thus be passed directly, otherwise it is
190   ///              an equivalent but different value.
191   /// \param ReplVal The replacement value, thus a copy or new created version
192   ///                of \p Inner.
193   ///
194   /// \returns The new insertion point where code generation continues and
195   ///          \p ReplVal the replacement value.
196   using PrivatizeCallbackTy = function_ref<InsertPointTy(
197       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
198       Value &Inner, Value *&ReplVal)>;
199 
200   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
201   /// (filename, line, column, ...).
202   struct LocationDescription {
203     LocationDescription(const IRBuilderBase &IRB)
204         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
205     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
206     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
207         : IP(IP), DL(DL) {}
208     InsertPointTy IP;
209     DebugLoc DL;
210   };
211 
212   /// Emitter methods for OpenMP directives.
213   ///
214   ///{
215 
216   /// Generator for '#omp barrier'
217   ///
218   /// \param Loc The location where the barrier directive was encountered.
219   /// \param DK The kind of directive that caused the barrier.
220   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
221   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
222   ///                        should be checked and acted upon.
223   ///
224   /// \returns The insertion point after the barrier.
225   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
226                               bool ForceSimpleCall = false,
227                               bool CheckCancelFlag = true);
228 
229   /// Generator for '#omp cancel'
230   ///
231   /// \param Loc The location where the directive was encountered.
232   /// \param IfCondition The evaluated 'if' clause expression, if any.
233   /// \param CanceledDirective The kind of directive that is cancled.
234   ///
235   /// \returns The insertion point after the barrier.
236   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
237                              omp::Directive CanceledDirective);
238 
239   /// Generator for '#omp parallel'
240   ///
241   /// \param Loc The insert and source location description.
242   /// \param AllocaIP The insertion points to be used for alloca instructions.
243   /// \param BodyGenCB Callback that will generate the region code.
244   /// \param PrivCB Callback to copy a given variable (think copy constructor).
245   /// \param FiniCB Callback to finalize variable copies.
246   /// \param IfCondition The evaluated 'if' clause expression, if any.
247   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
248   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
249   /// \param IsCancellable Flag to indicate a cancellable parallel region.
250   ///
251   /// \returns The insertion position *after* the parallel.
252   IRBuilder<>::InsertPoint
253   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
254                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
255                  FinalizeCallbackTy FiniCB, Value *IfCondition,
256                  Value *NumThreads, omp::ProcBindKind ProcBind,
257                  bool IsCancellable);
258 
259   /// Generator for the control flow structure of an OpenMP canonical loop.
260   ///
261   /// This generator operates on the logical iteration space of the loop, i.e.
262   /// the caller only has to provide a loop trip count of the loop as defined by
263   /// base language semantics. The trip count is interpreted as an unsigned
264   /// integer. The induction variable passed to \p BodyGenCB will be of the same
265   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
266   /// convert the logical iteration variable to the loop counter variable in the
267   /// loop body.
268   ///
269   /// \param Loc       The insert and source location description. The insert
270   ///                  location can be between two instructions or the end of a
271   ///                  degenerate block (e.g. a BB under construction).
272   /// \param BodyGenCB Callback that will generate the loop body code.
273   /// \param TripCount Number of iterations the loop body is executed.
274   /// \param Name      Base name used to derive BB and instruction names.
275   ///
276   /// \returns An object representing the created control flow structure which
277   ///          can be used for loop-associated directives.
278   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
279                                          LoopBodyGenCallbackTy BodyGenCB,
280                                          Value *TripCount,
281                                          const Twine &Name = "loop");
282 
283   /// Generator for the control flow structure of an OpenMP canonical loop.
284   ///
285   /// Instead of a logical iteration space, this allows specifying user-defined
286   /// loop counter values using increment, upper- and lower bounds. To
287   /// disambiguate the terminology when counting downwards, instead of lower
288   /// bounds we use \p Start for the loop counter value in the first body
289   /// iteration.
290   ///
291   /// Consider the following limitations:
292   ///
293   ///  * A loop counter space over all integer values of its bit-width cannot be
294   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
295   ///    stored into an 8 bit integer):
296   ///
297   ///      DO I = 0, 255, 1
298   ///
299   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
300   ///    effectively counting downwards:
301   ///
302   ///      for (uint8_t i = 100u; i > 0; i += 127u)
303   ///
304   ///
305   /// TODO: May need to add additional parameters to represent:
306   ///
307   ///  * Allow representing downcounting with unsigned integers.
308   ///
309   ///  * Sign of the step and the comparison operator might disagree:
310   ///
311   ///      for (int i = 0; i < 42; i -= 1u)
312   ///
313   //
314   /// \param Loc       The insert and source location description.
315   /// \param BodyGenCB Callback that will generate the loop body code.
316   /// \param Start     Value of the loop counter for the first iterations.
317   /// \param Stop      Loop counter values past this will stop the loop.
318   /// \param Step      Loop counter increment after each iteration; negative
319   ///                  means counting down.
320   /// \param IsSigned  Whether Start, Stop and Step are signed integers.
321   /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
322   ///                      counter.
323   /// \param ComputeIP Insertion point for instructions computing the trip
324   ///                  count. Can be used to ensure the trip count is available
325   ///                  at the outermost loop of a loop nest. If not set,
326   ///                  defaults to the preheader of the generated loop.
327   /// \param Name      Base name used to derive BB and instruction names.
328   ///
329   /// \returns An object representing the created control flow structure which
330   ///          can be used for loop-associated directives.
331   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
332                                          LoopBodyGenCallbackTy BodyGenCB,
333                                          Value *Start, Value *Stop, Value *Step,
334                                          bool IsSigned, bool InclusiveStop,
335                                          InsertPointTy ComputeIP = {},
336                                          const Twine &Name = "loop");
337 
338   /// Collapse a loop nest into a single loop.
339   ///
340   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
341   /// that has the same number of innermost loop iterations as the origin loop
342   /// nest. The induction variables of the input loops are derived from the
343   /// collapsed loop's induction variable. This is intended to be used to
344   /// implement OpenMP's collapse clause. Before applying a directive,
345   /// collapseLoops normalizes a loop nest to contain only a single loop and the
346   /// directive's implementation does not need to handle multiple loops itself.
347   /// This does not remove the need to handle all loop nest handling by
348   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
349   /// modifier of the worksharing-loop directive.
350   ///
351   /// Example:
352   /// \code
353   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
354   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
355   ///       body(i, j);
356   /// \endcode
357   ///
358   /// After collapsing with Loops={i,j}, the loop is changed to
359   /// \code
360   ///   for (int ij = 0; ij < 63; ++ij) {
361   ///     int i = ij / 9;
362   ///     int j = ij % 9;
363   ///     body(i, j);
364   ///   }
365   /// \endcode
366   ///
367   /// In the current implementation, the following limitations apply:
368   ///
369   ///  * All input loops have an induction variable of the same type.
370   ///
371   ///  * The collapsed loop will have the same trip count integer type as the
372   ///    input loops. Therefore it is possible that the collapsed loop cannot
373   ///    represent all iterations of the input loops. For instance, assuming a
374   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
375   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
376   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
377   ///    in this case.
378   ///
379   ///  * The trip counts of every input loop must be available at \p ComputeIP.
380   ///    Non-rectangular loops are not yet supported.
381   ///
382   ///  * At each nest level, code between a surrounding loop and its nested loop
383   ///    is hoisted into the loop body, and such code will be executed more
384   ///    often than before collapsing (or not at all if any inner loop iteration
385   ///    has a trip count of 0). This is permitted by the OpenMP specification.
386   ///
387   /// \param DL        Debug location for instructions added for collapsing,
388   ///                  such as instructions to compute/derive the input loop's
389   ///                  induction variables.
390   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
391   ///                  from outermost-to-innermost and every control flow of a
392   ///                  loop's body must pass through its directly nested loop.
393   /// \param ComputeIP Where additional instruction that compute the collapsed
394   ///                  trip count. If not set, defaults to before the generated
395   ///                  loop.
396   ///
397   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
398   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
399                                    ArrayRef<CanonicalLoopInfo *> Loops,
400                                    InsertPointTy ComputeIP);
401 
402 private:
403   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
404   ///
405   /// This takes a \p LoopInfo representing a canonical loop, such as the one
406   /// created by \p createCanonicalLoop and emits additional instructions to
407   /// turn it into a workshare loop. In particular, it calls to an OpenMP
408   /// runtime function in the preheader to obtain the loop bounds to be used in
409   /// the current thread, updates the relevant instructions in the canonical
410   /// loop and calls to an OpenMP runtime finalization function after the loop.
411   ///
412   /// \param DL       Debug location for instructions added for the
413   ///                 workshare-loop construct itself.
414   /// \param CLI      A descriptor of the canonical loop to workshare.
415   /// \param AllocaIP An insertion point for Alloca instructions usable in the
416   ///                 preheader of the loop.
417   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
418   ///                     the loop.
419   ///
420   /// \returns Point where to insert code after the workshare construct.
421   InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
422                                          InsertPointTy AllocaIP,
423                                          bool NeedsBarrier);
424 
425   /// Modifies the canonical loop a statically-scheduled workshare loop with a
426   /// user-specified chunk size.
427   ///
428   /// \param DL           Debug location for instructions added for the
429   ///                     workshare-loop construct itself.
430   /// \param CLI          A descriptor of the canonical loop to workshare.
431   /// \param AllocaIP     An insertion point for Alloca instructions usable in
432   ///                     the preheader of the loop.
433   /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
434   ///                     loop.
435   /// \param ChunkSize    The user-specified chunk size.
436   ///
437   /// \returns Point where to insert code after the workshare construct.
438   InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
439                                                 CanonicalLoopInfo *CLI,
440                                                 InsertPointTy AllocaIP,
441                                                 bool NeedsBarrier,
442                                                 Value *ChunkSize);
443 
444   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
445   ///
446   /// This takes a \p LoopInfo representing a canonical loop, such as the one
447   /// created by \p createCanonicalLoop and emits additional instructions to
448   /// turn it into a workshare loop. In particular, it calls to an OpenMP
449   /// runtime function in the preheader to obtain, and then in each iteration
450   /// to update the loop counter.
451   ///
452   /// \param DL       Debug location for instructions added for the
453   ///                 workshare-loop construct itself.
454   /// \param CLI      A descriptor of the canonical loop to workshare.
455   /// \param AllocaIP An insertion point for Alloca instructions usable in the
456   ///                 preheader of the loop.
457   /// \param SchedType Type of scheduling to be passed to the init function.
458   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
459   ///                     the loop.
460   /// \param Chunk    The size of loop chunk considered as a unit when
461   ///                 scheduling. If \p nullptr, defaults to 1.
462   ///
463   /// \returns Point where to insert code after the workshare construct.
464   InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
465                                           InsertPointTy AllocaIP,
466                                           omp::OMPScheduleType SchedType,
467                                           bool NeedsBarrier,
468                                           Value *Chunk = nullptr);
469 
470 public:
471   /// Modifies the canonical loop to be a workshare loop.
472   ///
473   /// This takes a \p LoopInfo representing a canonical loop, such as the one
474   /// created by \p createCanonicalLoop and emits additional instructions to
475   /// turn it into a workshare loop. In particular, it calls to an OpenMP
476   /// runtime function in the preheader to obtain the loop bounds to be used in
477   /// the current thread, updates the relevant instructions in the canonical
478   /// loop and calls to an OpenMP runtime finalization function after the loop.
479   ///
480   /// The concrete transformation is done by applyStaticWorkshareLoop,
481   /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
482   /// on the value of \p SchedKind and \p ChunkSize.
483   ///
484   /// \param DL       Debug location for instructions added for the
485   ///                 workshare-loop construct itself.
486   /// \param CLI      A descriptor of the canonical loop to workshare.
487   /// \param AllocaIP An insertion point for Alloca instructions usable in the
488   ///                 preheader of the loop.
489   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
490   ///                     the loop.
491   /// \param SchedKind Scheduling algorithm to use.
492   /// \param ChunkSize The chunk size for the inner loop.
493   /// \param HasSimdModifier Whether the simd modifier is present in the
494   ///                        schedule clause.
495   /// \param HasMonotonicModifier Whether the monotonic modifier is present in
496   ///                             the schedule clause.
497   /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
498   ///                                present in the schedule clause.
499   /// \param HasOrderedClause Whether the (parameterless) ordered clause is
500   ///                         present.
501   ///
502   /// \returns Point where to insert code after the workshare construct.
503   InsertPointTy applyWorkshareLoop(
504       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
505       bool NeedsBarrier,
506       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
507       Value *ChunkSize = nullptr, bool HasSimdModifier = false,
508       bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
509       bool HasOrderedClause = false);
510 
511   /// Tile a loop nest.
512   ///
513   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
514   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
515   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
516   /// of every loop and every tile sizes must be usable in the outermost
517   /// loop's preheader. This implies that the loop nest is rectangular.
518   ///
519   /// Example:
520   /// \code
521   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
522   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
523   ///         body(i, j);
524   /// \endcode
525   ///
526   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
527   /// \code
528   ///   for (int i1 = 0; i1 < 3; ++i1)
529   ///     for (int j1 = 0; j1 < 2; ++j1)
530   ///       for (int i2 = 0; i2 < 5; ++i2)
531   ///         for (int j2 = 0; j2 < 7; ++j2)
532   ///           body(i1*3+i2, j1*3+j2);
533   /// \endcode
534   ///
535   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
536   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
537   /// handles non-constant trip counts, non-constant tile sizes and trip counts
538   /// that are not multiples of the tile size. In the latter case the tile loop
539   /// of the last floor-loop iteration will have fewer iterations than specified
540   /// as its tile size.
541   ///
542   ///
543   /// @param DL        Debug location for instructions added by tiling, for
544   ///                  instance the floor- and tile trip count computation.
545   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
546   ///                  invalidated by this method, i.e. should not used after
547   ///                  tiling.
548   /// @param TileSizes For each loop in \p Loops, the tile size for that
549   ///                  dimensions.
550   ///
551   /// \returns A list of generated loops. Contains twice as many loops as the
552   ///          input loop nest; the first half are the floor loops and the
553   ///          second half are the tile loops.
554   std::vector<CanonicalLoopInfo *>
555   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
556             ArrayRef<Value *> TileSizes);
557 
558   /// Fully unroll a loop.
559   ///
560   /// Instead of unrolling the loop immediately (and duplicating its body
561   /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
562   /// metadata.
563   ///
564   /// \param DL   Debug location for instructions added by unrolling.
565   /// \param Loop The loop to unroll. The loop will be invalidated.
566   void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
567 
568   /// Fully or partially unroll a loop. How the loop is unrolled is determined
569   /// using LLVM's LoopUnrollPass.
570   ///
571   /// \param DL   Debug location for instructions added by unrolling.
572   /// \param Loop The loop to unroll. The loop will be invalidated.
573   void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
574 
575   /// Partially unroll a loop.
576   ///
577   /// The CanonicalLoopInfo of the unrolled loop for use with chained
578   /// loop-associated directive can be requested using \p UnrolledCLI. Not
579   /// needing the CanonicalLoopInfo allows more efficient code generation by
580   /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
581   /// A loop-associated directive applied to the unrolled loop needs to know the
582   /// new trip count which means that if using a heuristically determined unroll
583   /// factor (\p Factor == 0), that factor must be computed immediately. We are
584   /// using the same logic as the LoopUnrollPass to derived the unroll factor,
585   /// but which assumes that some canonicalization has taken place (e.g.
586   /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
587   /// better when the unrolled loop's CanonicalLoopInfo is not needed.
588   ///
589   /// \param DL          Debug location for instructions added by unrolling.
590   /// \param Loop        The loop to unroll. The loop will be invalidated.
591   /// \param Factor      The factor to unroll the loop by. A factor of 0
592   ///                    indicates that a heuristic should be used to determine
593   ///                    the unroll-factor.
594   /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
595   ///                    partially unrolled loop. Otherwise, uses loop metadata
596   ///                    to defer unrolling to the LoopUnrollPass.
597   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
598                          CanonicalLoopInfo **UnrolledCLI);
599 
600   /// Add metadata to simd-ize a loop.
601   ///
602   /// \param Loop    The loop to simd-ize.
603   /// \param Simdlen The Simdlen length to apply to the simd loop.
604   void applySimd(CanonicalLoopInfo *Loop, ConstantInt *Simdlen);
605 
606   /// Generator for '#omp flush'
607   ///
608   /// \param Loc The location where the flush directive was encountered
609   void createFlush(const LocationDescription &Loc);
610 
611   /// Generator for '#omp taskwait'
612   ///
613   /// \param Loc The location where the taskwait directive was encountered.
614   void createTaskwait(const LocationDescription &Loc);
615 
616   /// Generator for '#omp taskyield'
617   ///
618   /// \param Loc The location where the taskyield directive was encountered.
619   void createTaskyield(const LocationDescription &Loc);
620 
621   /// Generator for `#omp task`
622   ///
623   /// \param Loc The location where the task construct was encountered.
624   /// \param AllocaIP The insertion point to be used for alloca instructions.
625   /// \param BodyGenCB Callback that will generate the region code.
626   /// \param Tied True if the task is tied, false if the task is untied.
627   /// \param Final i1 value which is `true` if the task is final, `false` if the
628   ///              task is not final.
629   InsertPointTy createTask(const LocationDescription &Loc,
630                            InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
631                            bool Tied = true, Value *Final = nullptr);
632 
633   /// Generator for the taskgroup construct
634   ///
635   /// \param Loc The location where the taskgroup construct was encountered.
636   /// \param AllocaIP The insertion point to be used for alloca instructions.
637   /// \param BodyGenCB Callback that will generate the region code.
638   InsertPointTy createTaskgroup(const LocationDescription &Loc,
639                                 InsertPointTy AllocaIP,
640                                 BodyGenCallbackTy BodyGenCB);
641 
642   /// Functions used to generate reductions. Such functions take two Values
643   /// representing LHS and RHS of the reduction, respectively, and a reference
644   /// to the value that is updated to refer to the reduction result.
645   using ReductionGenTy =
646       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
647 
648   /// Functions used to generate atomic reductions. Such functions take two
649   /// Values representing pointers to LHS and RHS of the reduction, as well as
650   /// the element type of these pointers. They are expected to atomically
651   /// update the LHS to the reduced value.
652   using AtomicReductionGenTy =
653       function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
654 
655   /// Information about an OpenMP reduction.
656   struct ReductionInfo {
657     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
658                   ReductionGenTy ReductionGen,
659                   AtomicReductionGenTy AtomicReductionGen)
660         : ElementType(ElementType), Variable(Variable),
661           PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
662           AtomicReductionGen(AtomicReductionGen) {
663       assert(cast<PointerType>(Variable->getType())
664           ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
665     }
666 
667     /// Reduction element type, must match pointee type of variable.
668     Type *ElementType;
669 
670     /// Reduction variable of pointer type.
671     Value *Variable;
672 
673     /// Thread-private partial reduction variable.
674     Value *PrivateVariable;
675 
676     /// Callback for generating the reduction body. The IR produced by this will
677     /// be used to combine two values in a thread-safe context, e.g., under
678     /// lock or within the same thread, and therefore need not be atomic.
679     ReductionGenTy ReductionGen;
680 
681     /// Callback for generating the atomic reduction body, may be null. The IR
682     /// produced by this will be used to atomically combine two values during
683     /// reduction. If null, the implementation will use the non-atomic version
684     /// along with the appropriate synchronization mechanisms.
685     AtomicReductionGenTy AtomicReductionGen;
686   };
687 
688   // TODO: provide atomic and non-atomic reduction generators for reduction
689   // operators defined by the OpenMP specification.
690 
691   /// Generator for '#omp reduction'.
692   ///
693   /// Emits the IR instructing the runtime to perform the specific kind of
694   /// reductions. Expects reduction variables to have been privatized and
695   /// initialized to reduction-neutral values separately. Emits the calls to
696   /// runtime functions as well as the reduction function and the basic blocks
697   /// performing the reduction atomically and non-atomically.
698   ///
699   /// The code emitted for the following:
700   ///
701   /// \code
702   ///   type var_1;
703   ///   type var_2;
704   ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
705   ///   /* body */;
706   /// \endcode
707   ///
708   /// corresponds to the following sketch.
709   ///
710   /// \code
711   /// void _outlined_par() {
712   ///   // N is the number of different reductions.
713   ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
714   ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
715   ///                        _omp_reduction_func,
716   ///                        _gomp_critical_user.reduction.var)) {
717   ///   case 1: {
718   ///     var_1 = var_1 <reduction-op> privatized_var_1;
719   ///     var_2 = var_2 <reduction-op> privatized_var_2;
720   ///     // ...
721   ///    __kmpc_end_reduce(...);
722   ///     break;
723   ///   }
724   ///   case 2: {
725   ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
726   ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
727   ///     // ...
728   ///     break;
729   ///   }
730   ///   default: break;
731   ///   }
732   /// }
733   ///
734   /// void _omp_reduction_func(void **lhs, void **rhs) {
735   ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
736   ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
737   ///   // ...
738   /// }
739   /// \endcode
740   ///
741   /// \param Loc                The location where the reduction was
742   ///                           encountered. Must be within the associate
743   ///                           directive and after the last local access to the
744   ///                           reduction variables.
745   /// \param AllocaIP           An insertion point suitable for allocas usable
746   ///                           in reductions.
747   /// \param ReductionInfos     A list of info on each reduction variable.
748   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
749   InsertPointTy createReductions(const LocationDescription &Loc,
750                                  InsertPointTy AllocaIP,
751                                  ArrayRef<ReductionInfo> ReductionInfos,
752                                  bool IsNoWait = false);
753 
754   ///}
755 
756   /// Return the insertion point used by the underlying IRBuilder.
757   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
758 
759   /// Update the internal location to \p Loc.
760   bool updateToLocation(const LocationDescription &Loc) {
761     Builder.restoreIP(Loc.IP);
762     Builder.SetCurrentDebugLocation(Loc.DL);
763     return Loc.IP.getBlock() != nullptr;
764   }
765 
766   /// Return the function declaration for the runtime function with \p FnID.
767   FunctionCallee getOrCreateRuntimeFunction(Module &M,
768                                             omp::RuntimeFunction FnID);
769 
770   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
771 
772   /// Return the (LLVM-IR) string describing the source location \p LocStr.
773   Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
774 
775   /// Return the (LLVM-IR) string describing the default source location.
776   Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
777 
778   /// Return the (LLVM-IR) string describing the source location identified by
779   /// the arguments.
780   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
781                                  unsigned Line, unsigned Column,
782                                  uint32_t &SrcLocStrSize);
783 
784   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
785   /// fallback if \p DL does not specify the function name.
786   Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
787                                  Function *F = nullptr);
788 
789   /// Return the (LLVM-IR) string describing the source location \p Loc.
790   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
791                                  uint32_t &SrcLocStrSize);
792 
793   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
794   /// TODO: Create a enum class for the Reserve2Flags
795   Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
796                              omp::IdentFlag Flags = omp::IdentFlag(0),
797                              unsigned Reserve2Flags = 0);
798 
799   /// Create a hidden global flag \p Name in the module with initial value \p
800   /// Value.
801   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
802 
803   /// Create an offloading section struct used to register this global at
804   /// runtime.
805   ///
806   /// Type struct __tgt_offload_entry{
807   ///   void    *addr;      // Pointer to the offload entry info.
808   ///                       // (function or global)
809   ///   char    *name;      // Name of the function or global.
810   ///   size_t  size;       // Size of the entry info (0 if it a function).
811   ///   int32_t flags;
812   ///   int32_t reserved;
813   /// };
814   ///
815   /// \param Addr The pointer to the global being registered.
816   /// \param Name The symbol name associated with the global.
817   /// \param Size The size in bytes of the global (0 for functions).
818   /// \param Flags Flags associated with the entry.
819   /// \param SectionName The section this entry will be placed at.
820   void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
821                            int32_t Flags,
822                            StringRef SectionName = "omp_offloading_entries");
823 
824   /// Generate control flow and cleanup for cancellation.
825   ///
826   /// \param CancelFlag Flag indicating if the cancellation is performed.
827   /// \param CanceledDirective The kind of directive that is cancled.
828   /// \param ExitCB Extra code to be generated in the exit block.
829   void emitCancelationCheckImpl(Value *CancelFlag,
830                                 omp::Directive CanceledDirective,
831                                 FinalizeCallbackTy ExitCB = {});
832 
833   /// Generate a target region entry call.
834   ///
835   /// \param Loc The location at which the request originated and is fulfilled.
836   /// \param Return Return value of the created function returned by reference.
837   /// \param DeviceID Identifier for the device via the 'device' clause.
838   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
839   ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
840   /// \param NumThreads Number of threads via the 'thread_limit' clause.
841   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
842   /// \param KernelArgs Array of arguments to the kernel.
843   /// \param NoWaitKernelArgs Optional array of arguments to the nowait kernel.
844   InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
845                                  Value *Ident, Value *DeviceID, Value *NumTeams,
846                                  Value *NumThreads, Value *HostPtr,
847                                  ArrayRef<Value *> KernelArgs,
848                                  ArrayRef<Value *> NoWaitArgs = {});
849 
850   /// Generate a barrier runtime call.
851   ///
852   /// \param Loc The location at which the request originated and is fulfilled.
853   /// \param DK The directive which caused the barrier
854   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
855   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
856   ///                        should be checked and acted upon.
857   ///
858   /// \returns The insertion point after the barrier.
859   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
860                                 omp::Directive DK, bool ForceSimpleCall,
861                                 bool CheckCancelFlag);
862 
863   /// Generate a flush runtime call.
864   ///
865   /// \param Loc The location at which the request originated and is fulfilled.
866   void emitFlush(const LocationDescription &Loc);
867 
868   /// The finalization stack made up of finalize callbacks currently in-flight,
869   /// wrapped into FinalizationInfo objects that reference also the finalization
870   /// target block and the kind of cancellable directive.
871   SmallVector<FinalizationInfo, 8> FinalizationStack;
872 
873   /// Return true if the last entry in the finalization stack is of kind \p DK
874   /// and cancellable.
875   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
876     return !FinalizationStack.empty() &&
877            FinalizationStack.back().IsCancellable &&
878            FinalizationStack.back().DK == DK;
879   }
880 
881   /// Generate a taskwait runtime call.
882   ///
883   /// \param Loc The location at which the request originated and is fulfilled.
884   void emitTaskwaitImpl(const LocationDescription &Loc);
885 
886   /// Generate a taskyield runtime call.
887   ///
888   /// \param Loc The location at which the request originated and is fulfilled.
889   void emitTaskyieldImpl(const LocationDescription &Loc);
890 
891   /// Return the current thread ID.
892   ///
893   /// \param Ident The ident (ident_t*) describing the query origin.
894   Value *getOrCreateThreadID(Value *Ident);
895 
896   /// The underlying LLVM-IR module
897   Module &M;
898 
899   /// The LLVM-IR Builder used to create IR.
900   IRBuilder<> Builder;
901 
902   /// Map to remember source location strings
903   StringMap<Constant *> SrcLocStrMap;
904 
905   /// Map to remember existing ident_t*.
906   DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
907 
908   /// Helper that contains information about regions we need to outline
909   /// during finalization.
910   struct OutlineInfo {
911     using PostOutlineCBTy = std::function<void(Function &)>;
912     PostOutlineCBTy PostOutlineCB;
913     BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
914     SmallVector<Value *, 2> ExcludeArgsFromAggregate;
915 
916     /// Collect all blocks in between EntryBB and ExitBB in both the given
917     /// vector and set.
918     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
919                        SmallVectorImpl<BasicBlock *> &BlockVector);
920 
921     /// Return the function that contains the region to be outlined.
922     Function *getFunction() const { return EntryBB->getParent(); }
923   };
924 
925   /// Collection of regions that need to be outlined during finalization.
926   SmallVector<OutlineInfo, 16> OutlineInfos;
927 
928   /// Collection of owned canonical loop objects that eventually need to be
929   /// free'd.
930   std::forward_list<CanonicalLoopInfo> LoopInfos;
931 
932   /// Add a new region that will be outlined later.
933   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
934 
935   /// An ordered map of auto-generated variables to their unique names.
936   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
937   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
938   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
939   /// variables.
940   StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
941 
942   /// Create the global variable holding the offload mappings information.
943   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
944                                         std::string VarName);
945 
946   /// Create the global variable holding the offload names information.
947   GlobalVariable *
948   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
949                         std::string VarName);
950 
951   struct MapperAllocas {
952     AllocaInst *ArgsBase = nullptr;
953     AllocaInst *Args = nullptr;
954     AllocaInst *ArgSizes = nullptr;
955   };
956 
957   /// Create the allocas instruction used in call to mapper functions.
958   void createMapperAllocas(const LocationDescription &Loc,
959                            InsertPointTy AllocaIP, unsigned NumOperands,
960                            struct MapperAllocas &MapperAllocas);
961 
962   /// Create the call for the target mapper function.
963   /// \param Loc The source location description.
964   /// \param MapperFunc Function to be called.
965   /// \param SrcLocInfo Source location information global.
966   /// \param MaptypesArg The argument types.
967   /// \param MapnamesArg The argument names.
968   /// \param MapperAllocas The AllocaInst used for the call.
969   /// \param DeviceID Device ID for the call.
970   /// \param NumOperands Number of operands in the call.
971   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
972                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
973                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
974                       unsigned NumOperands);
975 
976 public:
977   /// Generator for __kmpc_copyprivate
978   ///
979   /// \param Loc The source location description.
980   /// \param BufSize Number of elements in the buffer.
981   /// \param CpyBuf List of pointers to data to be copied.
982   /// \param CpyFn function to call for copying data.
983   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
984   ///
985   /// \return The insertion position *after* the CopyPrivate call.
986 
987   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
988                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
989                                   llvm::Value *CpyFn, llvm::Value *DidIt);
990 
991   /// Generator for '#omp single'
992   ///
993   /// \param Loc The source location description.
994   /// \param BodyGenCB Callback that will generate the region code.
995   /// \param FiniCB Callback to finalize variable copies.
996   /// \param IsNowait If false, a barrier is emitted.
997   /// \param DidIt Local variable used as a flag to indicate 'single' thread
998   ///
999   /// \returns The insertion position *after* the single call.
1000   InsertPointTy createSingle(const LocationDescription &Loc,
1001                              BodyGenCallbackTy BodyGenCB,
1002                              FinalizeCallbackTy FiniCB, bool IsNowait,
1003                              llvm::Value *DidIt);
1004 
1005   /// Generator for '#omp master'
1006   ///
1007   /// \param Loc The insert and source location description.
1008   /// \param BodyGenCB Callback that will generate the region code.
1009   /// \param FiniCB Callback to finalize variable copies.
1010   ///
1011   /// \returns The insertion position *after* the master.
1012   InsertPointTy createMaster(const LocationDescription &Loc,
1013                              BodyGenCallbackTy BodyGenCB,
1014                              FinalizeCallbackTy FiniCB);
1015 
1016   /// Generator for '#omp masked'
1017   ///
1018   /// \param Loc The insert and source location description.
1019   /// \param BodyGenCB Callback that will generate the region code.
1020   /// \param FiniCB Callback to finialize variable copies.
1021   ///
1022   /// \returns The insertion position *after* the masked.
1023   InsertPointTy createMasked(const LocationDescription &Loc,
1024                              BodyGenCallbackTy BodyGenCB,
1025                              FinalizeCallbackTy FiniCB, Value *Filter);
1026 
1027   /// Generator for '#omp critical'
1028   ///
1029   /// \param Loc The insert and source location description.
1030   /// \param BodyGenCB Callback that will generate the region body code.
1031   /// \param FiniCB Callback to finalize variable copies.
1032   /// \param CriticalName name of the lock used by the critical directive
1033   /// \param HintInst Hint Instruction for hint clause associated with critical
1034   ///
1035   /// \returns The insertion position *after* the critical.
1036   InsertPointTy createCritical(const LocationDescription &Loc,
1037                                BodyGenCallbackTy BodyGenCB,
1038                                FinalizeCallbackTy FiniCB,
1039                                StringRef CriticalName, Value *HintInst);
1040 
1041   /// Generator for '#omp ordered depend (source | sink)'
1042   ///
1043   /// \param Loc The insert and source location description.
1044   /// \param AllocaIP The insertion point to be used for alloca instructions.
1045   /// \param NumLoops The number of loops in depend clause.
1046   /// \param StoreValues The value will be stored in vector address.
1047   /// \param Name The name of alloca instruction.
1048   /// \param IsDependSource If true, depend source; otherwise, depend sink.
1049   ///
1050   /// \return The insertion position *after* the ordered.
1051   InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1052                                     InsertPointTy AllocaIP, unsigned NumLoops,
1053                                     ArrayRef<llvm::Value *> StoreValues,
1054                                     const Twine &Name, bool IsDependSource);
1055 
1056   /// Generator for '#omp ordered [threads | simd]'
1057   ///
1058   /// \param Loc The insert and source location description.
1059   /// \param BodyGenCB Callback that will generate the region code.
1060   /// \param FiniCB Callback to finalize variable copies.
1061   /// \param IsThreads If true, with threads clause or without clause;
1062   /// otherwise, with simd clause;
1063   ///
1064   /// \returns The insertion position *after* the ordered.
1065   InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1066                                          BodyGenCallbackTy BodyGenCB,
1067                                          FinalizeCallbackTy FiniCB,
1068                                          bool IsThreads);
1069 
1070   /// Generator for '#omp sections'
1071   ///
1072   /// \param Loc The insert and source location description.
1073   /// \param AllocaIP The insertion points to be used for alloca instructions.
1074   /// \param SectionCBs Callbacks that will generate body of each section.
1075   /// \param PrivCB Callback to copy a given variable (think copy constructor).
1076   /// \param FiniCB Callback to finalize variable copies.
1077   /// \param IsCancellable Flag to indicate a cancellable parallel region.
1078   /// \param IsNowait If true, barrier - to ensure all sections are executed
1079   /// before moving forward will not be generated.
1080   /// \returns The insertion position *after* the sections.
1081   InsertPointTy createSections(const LocationDescription &Loc,
1082                                InsertPointTy AllocaIP,
1083                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1084                                PrivatizeCallbackTy PrivCB,
1085                                FinalizeCallbackTy FiniCB, bool IsCancellable,
1086                                bool IsNowait);
1087 
1088   /// Generator for '#omp section'
1089   ///
1090   /// \param Loc The insert and source location description.
1091   /// \param BodyGenCB Callback that will generate the region body code.
1092   /// \param FiniCB Callback to finalize variable copies.
1093   /// \returns The insertion position *after* the section.
1094   InsertPointTy createSection(const LocationDescription &Loc,
1095                               BodyGenCallbackTy BodyGenCB,
1096                               FinalizeCallbackTy FiniCB);
1097 
1098   /// Generate conditional branch and relevant BasicBlocks through which private
1099   /// threads copy the 'copyin' variables from Master copy to threadprivate
1100   /// copies.
1101   ///
1102   /// \param IP insertion block for copyin conditional
1103   /// \param MasterVarPtr a pointer to the master variable
1104   /// \param PrivateVarPtr a pointer to the threadprivate variable
1105   /// \param IntPtrTy Pointer size type
1106   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1107   //				 and copy.in.end block
1108   ///
1109   /// \returns The insertion point where copying operation to be emitted.
1110   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1111                                          Value *PrivateAddr,
1112                                          llvm::IntegerType *IntPtrTy,
1113                                          bool BranchtoEnd = true);
1114 
1115   /// Create a runtime call for kmpc_Alloc
1116   ///
1117   /// \param Loc The insert and source location description.
1118   /// \param Size Size of allocated memory space
1119   /// \param Allocator Allocator information instruction
1120   /// \param Name Name of call Instruction for OMP_alloc
1121   ///
1122   /// \returns CallInst to the OMP_Alloc call
1123   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1124                            Value *Allocator, std::string Name = "");
1125 
1126   /// Create a runtime call for kmpc_free
1127   ///
1128   /// \param Loc The insert and source location description.
1129   /// \param Addr Address of memory space to be freed
1130   /// \param Allocator Allocator information instruction
1131   /// \param Name Name of call Instruction for OMP_Free
1132   ///
1133   /// \returns CallInst to the OMP_Free call
1134   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1135                           Value *Allocator, std::string Name = "");
1136 
1137   /// Create a runtime call for kmpc_threadprivate_cached
1138   ///
1139   /// \param Loc The insert and source location description.
1140   /// \param Pointer pointer to data to be cached
1141   /// \param Size size of data to be cached
1142   /// \param Name Name of call Instruction for callinst
1143   ///
1144   /// \returns CallInst to the thread private cache call.
1145   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1146                                       llvm::Value *Pointer,
1147                                       llvm::ConstantInt *Size,
1148                                       const llvm::Twine &Name = Twine(""));
1149 
1150   /// Create a runtime call for __tgt_interop_init
1151   ///
1152   /// \param Loc The insert and source location description.
1153   /// \param InteropVar variable to be allocated
1154   /// \param InteropType type of interop operation
1155   /// \param Device devide to which offloading will occur
1156   /// \param NumDependences  number of dependence variables
1157   /// \param DependenceAddress pointer to dependence variables
1158   /// \param HaveNowaitClause does nowait clause exist
1159   ///
1160   /// \returns CallInst to the __tgt_interop_init call
1161   CallInst *createOMPInteropInit(const LocationDescription &Loc,
1162                                  Value *InteropVar,
1163                                  omp::OMPInteropType InteropType, Value *Device,
1164                                  Value *NumDependences,
1165                                  Value *DependenceAddress,
1166                                  bool HaveNowaitClause);
1167 
1168   /// Create a runtime call for __tgt_interop_destroy
1169   ///
1170   /// \param Loc The insert and source location description.
1171   /// \param InteropVar variable to be allocated
1172   /// \param Device devide to which offloading will occur
1173   /// \param NumDependences  number of dependence variables
1174   /// \param DependenceAddress pointer to dependence variables
1175   /// \param HaveNowaitClause does nowait clause exist
1176   ///
1177   /// \returns CallInst to the __tgt_interop_destroy call
1178   CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
1179                                     Value *InteropVar, Value *Device,
1180                                     Value *NumDependences,
1181                                     Value *DependenceAddress,
1182                                     bool HaveNowaitClause);
1183 
1184   /// Create a runtime call for __tgt_interop_use
1185   ///
1186   /// \param Loc The insert and source location description.
1187   /// \param InteropVar variable to be allocated
1188   /// \param Device devide to which offloading will occur
1189   /// \param NumDependences  number of dependence variables
1190   /// \param DependenceAddress pointer to dependence variables
1191   /// \param HaveNowaitClause does nowait clause exist
1192   ///
1193   /// \returns CallInst to the __tgt_interop_use call
1194   CallInst *createOMPInteropUse(const LocationDescription &Loc,
1195                                 Value *InteropVar, Value *Device,
1196                                 Value *NumDependences, Value *DependenceAddress,
1197                                 bool HaveNowaitClause);
1198 
1199   /// The `omp target` interface
1200   ///
1201   /// For more information about the usage of this interface,
1202   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1203   ///
1204   ///{
1205 
1206   /// Create a runtime call for kmpc_target_init
1207   ///
1208   /// \param Loc The insert and source location description.
1209   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1210   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
1211   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
1212                                  bool RequiresFullRuntime);
1213 
1214   /// Create a runtime call for kmpc_target_deinit
1215   ///
1216   /// \param Loc The insert and source location description.
1217   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1218   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
1219   void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
1220                           bool RequiresFullRuntime);
1221 
1222   ///}
1223 
1224   /// Declarations for LLVM-IR types (simple, array, function and structure) are
1225   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
1226   /// we provide the declarations, the initializeTypes function will provide the
1227   /// values.
1228   ///
1229   ///{
1230 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
1231 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
1232   ArrayType *VarName##Ty = nullptr;                                            \
1233   PointerType *VarName##PtrTy = nullptr;
1234 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
1235   FunctionType *VarName = nullptr;                                             \
1236   PointerType *VarName##Ptr = nullptr;
1237 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
1238   StructType *VarName = nullptr;                                               \
1239   PointerType *VarName##Ptr = nullptr;
1240 #include "llvm/Frontend/OpenMP/OMPKinds.def"
1241 
1242   ///}
1243 
1244 private:
1245   /// Create all simple and struct types exposed by the runtime and remember
1246   /// the llvm::PointerTypes of them for easy access later.
1247   void initializeTypes(Module &M);
1248 
1249   /// Common interface for generating entry calls for OMP Directives.
1250   /// if the directive has a region/body, It will set the insertion
1251   /// point to the body
1252   ///
1253   /// \param OMPD Directive to generate entry blocks for
1254   /// \param EntryCall Call to the entry OMP Runtime Function
1255   /// \param ExitBB block where the region ends.
1256   /// \param Conditional indicate if the entry call result will be used
1257   ///        to evaluate a conditional of whether a thread will execute
1258   ///        body code or not.
1259   ///
1260   /// \return The insertion position in exit block
1261   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
1262                                          BasicBlock *ExitBB,
1263                                          bool Conditional = false);
1264 
1265   /// Common interface to finalize the region
1266   ///
1267   /// \param OMPD Directive to generate exiting code for
1268   /// \param FinIP Insertion point for emitting Finalization code and exit call
1269   /// \param ExitCall Call to the ending OMP Runtime Function
1270   /// \param HasFinalize indicate if the directive will require finalization
1271   ///         and has a finalization callback in the stack that
1272   ///        should be called.
1273   ///
1274   /// \return The insertion position in exit block
1275   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
1276                                         InsertPointTy FinIP,
1277                                         Instruction *ExitCall,
1278                                         bool HasFinalize = true);
1279 
1280   /// Common Interface to generate OMP inlined regions
1281   ///
1282   /// \param OMPD Directive to generate inlined region for
1283   /// \param EntryCall Call to the entry OMP Runtime Function
1284   /// \param ExitCall Call to the ending OMP Runtime Function
1285   /// \param BodyGenCB Body code generation callback.
1286   /// \param FiniCB Finalization Callback. Will be called when finalizing region
1287   /// \param Conditional indicate if the entry call result will be used
1288   ///        to evaluate a conditional of whether a thread will execute
1289   ///        body code or not.
1290   /// \param HasFinalize indicate if the directive will require finalization
1291   ///        and has a finalization callback in the stack that
1292   ///        should be called.
1293   /// \param IsCancellable if HasFinalize is set to true, indicate if the
1294   ///        the directive should be cancellable.
1295   /// \return The insertion point after the region
1296 
1297   InsertPointTy
1298   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
1299                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
1300                        FinalizeCallbackTy FiniCB, bool Conditional = false,
1301                        bool HasFinalize = true, bool IsCancellable = false);
1302 
1303   /// Get the platform-specific name separator.
1304   /// \param Parts different parts of the final name that needs separation
1305   /// \param FirstSeparator First separator used between the initial two
1306   ///        parts of the name.
1307   /// \param Separator separator used between all of the rest consecutive
1308   ///        parts of the name
1309   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
1310                                            StringRef FirstSeparator,
1311                                            StringRef Separator);
1312 
1313   /// Gets (if variable with the given name already exist) or creates
1314   /// internal global variable with the specified Name. The created variable has
1315   /// linkage CommonLinkage by default and is initialized by null value.
1316   /// \param Ty Type of the global variable. If it is exist already the type
1317   /// must be the same.
1318   /// \param Name Name of the variable.
1319   Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
1320                                            unsigned AddressSpace = 0);
1321 
1322   /// Returns corresponding lock object for the specified critical region
1323   /// name. If the lock object does not exist it is created, otherwise the
1324   /// reference to the existing copy is returned.
1325   /// \param CriticalName Name of the critical region.
1326   ///
1327   Value *getOMPCriticalRegionLock(StringRef CriticalName);
1328 
1329   /// Callback type for Atomic Expression update
1330   /// ex:
1331   /// \code{.cpp}
1332   /// unsigned x = 0;
1333   /// #pragma omp atomic update
1334   /// x = Expr(x_old);  //Expr() is any legal operation
1335   /// \endcode
1336   ///
1337   /// \param XOld the value of the atomic memory address to use for update
1338   /// \param IRB reference to the IRBuilder to use
1339   ///
1340   /// \returns Value to update X to.
1341   using AtomicUpdateCallbackTy =
1342       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
1343 
1344 private:
1345   enum AtomicKind { Read, Write, Update, Capture, Compare };
1346 
1347   /// Determine whether to emit flush or not
1348   ///
1349   /// \param Loc    The insert and source location description.
1350   /// \param AO     The required atomic ordering
1351   /// \param AK     The OpenMP atomic operation kind used.
1352   ///
1353   /// \returns		wether a flush was emitted or not
1354   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
1355                                     AtomicOrdering AO, AtomicKind AK);
1356 
1357   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1358   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1359   /// Only Scalar data types.
1360   ///
1361   /// \param AllocaIP	  The insertion point to be used for alloca
1362   ///                   instructions.
1363   /// \param X			    The target atomic pointer to be updated
1364   /// \param XElemTy    The element type of the atomic pointer.
1365   /// \param Expr		    The value to update X with.
1366   /// \param AO			    Atomic ordering of the generated atomic
1367   ///                   instructions.
1368   /// \param RMWOp		  The binary operation used for update. If
1369   ///                   operation is not supported by atomicRMW,
1370   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
1371   ///                   Then a `cmpExch` based	atomic will be generated.
1372   /// \param UpdateOp 	Code generator for complex expressions that cannot be
1373   ///                   expressed through atomicrmw instruction.
1374   /// \param VolatileX	     true if \a X volatile?
1375   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1376   ///                     update expression, false otherwise.
1377   ///                     (e.g. true for X = X BinOp Expr)
1378   ///
1379   /// \returns A pair of the old value of X before the update, and the value
1380   ///          used for the update.
1381   std::pair<Value *, Value *>
1382   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
1383                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1384                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
1385                    bool IsXBinopExpr);
1386 
1387   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
1388   ///
1389   /// \Return The instruction
1390   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
1391                                 AtomicRMWInst::BinOp RMWOp);
1392 
1393 public:
1394   /// a struct to pack relevant information while generating atomic Ops
1395   struct AtomicOpValue {
1396     Value *Var = nullptr;
1397     Type *ElemTy = nullptr;
1398     bool IsSigned = false;
1399     bool IsVolatile = false;
1400   };
1401 
1402   /// Emit atomic Read for : V = X --- Only Scalar data types.
1403   ///
1404   /// \param Loc    The insert and source location description.
1405   /// \param X			The target pointer to be atomically read
1406   /// \param V			Memory address where to store atomically read
1407   /// 					    value
1408   /// \param AO			Atomic ordering of the generated atomic
1409   /// 					    instructions.
1410   ///
1411   /// \return Insertion point after generated atomic read IR.
1412   InsertPointTy createAtomicRead(const LocationDescription &Loc,
1413                                  AtomicOpValue &X, AtomicOpValue &V,
1414                                  AtomicOrdering AO);
1415 
1416   /// Emit atomic write for : X = Expr --- Only Scalar data types.
1417   ///
1418   /// \param Loc    The insert and source location description.
1419   /// \param X			The target pointer to be atomically written to
1420   /// \param Expr		The value to store.
1421   /// \param AO			Atomic ordering of the generated atomic
1422   ///               instructions.
1423   ///
1424   /// \return Insertion point after generated atomic Write IR.
1425   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
1426                                   AtomicOpValue &X, Value *Expr,
1427                                   AtomicOrdering AO);
1428 
1429   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1430   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1431   /// Only Scalar data types.
1432   ///
1433   /// \param Loc      The insert and source location description.
1434   /// \param AllocaIP The insertion point to be used for alloca instructions.
1435   /// \param X        The target atomic pointer to be updated
1436   /// \param Expr     The value to update X with.
1437   /// \param AO       Atomic ordering of the generated atomic instructions.
1438   /// \param RMWOp    The binary operation used for update. If operation
1439   ///                 is	not supported by atomicRMW, or belong to
1440   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
1441   ///                 atomic will be generated.
1442   /// \param UpdateOp 	Code generator for complex expressions that cannot be
1443   ///                   expressed through atomicrmw instruction.
1444   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1445   ///                     update expression, false otherwise.
1446   ///	                    (e.g. true for X = X BinOp Expr)
1447   ///
1448   /// \return Insertion point after generated atomic update IR.
1449   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
1450                                    InsertPointTy AllocaIP, AtomicOpValue &X,
1451                                    Value *Expr, AtomicOrdering AO,
1452                                    AtomicRMWInst::BinOp RMWOp,
1453                                    AtomicUpdateCallbackTy &UpdateOp,
1454                                    bool IsXBinopExpr);
1455 
1456   /// Emit atomic update for constructs: --- Only Scalar data types
1457   /// V = X; X = X BinOp Expr ,
1458   /// X = X BinOp Expr; V = X,
1459   /// V = X; X = Expr BinOp X,
1460   /// X = Expr BinOp X; V = X,
1461   /// V = X; X = UpdateOp(X),
1462   /// X = UpdateOp(X); V = X,
1463   ///
1464   /// \param Loc        The insert and source location description.
1465   /// \param AllocaIP   The insertion point to be used for alloca instructions.
1466   /// \param X          The target atomic pointer to be updated
1467   /// \param V          Memory address where to store captured value
1468   /// \param Expr       The value to update X with.
1469   /// \param AO         Atomic ordering of the generated atomic instructions
1470   /// \param RMWOp      The binary operation used for update. If
1471   ///                   operation is not supported by atomicRMW, or belong to
1472   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
1473   ///                   atomic will be generated.
1474   /// \param UpdateOp   Code generator for complex expressions that cannot be
1475   ///                   expressed through atomicrmw instruction.
1476   /// \param UpdateExpr true if X is an in place update of the form
1477   ///                   X = X BinOp Expr or X = Expr BinOp X
1478   /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
1479   ///                     update expression, false otherwise.
1480   ///                     (e.g. true for X = X BinOp Expr)
1481   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
1482   ///                        'v', not an updated one.
1483   ///
1484   /// \return Insertion point after generated atomic capture IR.
1485   InsertPointTy
1486   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
1487                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
1488                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1489                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
1490                       bool IsPostfixUpdate, bool IsXBinopExpr);
1491 
1492   /// Emit atomic compare for constructs: --- Only scalar data types
1493   /// cond-expr-stmt:
1494   /// x = x ordop expr ? expr : x;
1495   /// x = expr ordop x ? expr : x;
1496   /// x = x == e ? d : x;
1497   /// x = e == x ? d : x; (this one is not in the spec)
1498   /// cond-update-stmt:
1499   /// if (x ordop expr) { x = expr; }
1500   /// if (expr ordop x) { x = expr; }
1501   /// if (x == e) { x = d; }
1502   /// if (e == x) { x = d; } (this one is not in the spec)
1503   /// conditional-update-capture-atomic:
1504   /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
1505   /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
1506   /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1507   ///                                         IsFailOnly=true)
1508   /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
1509   /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1510   ///                                                IsFailOnly=true)
1511   ///
1512   /// \param Loc          The insert and source location description.
1513   /// \param X            The target atomic pointer to be updated.
1514   /// \param V            Memory address where to store captured value (for
1515   ///                     compare capture only).
1516   /// \param R            Memory address where to store comparison result
1517   ///                     (for compare capture with '==' only).
1518   /// \param E            The expected value ('e') for forms that use an
1519   ///                     equality comparison or an expression ('expr') for
1520   ///                     forms that use 'ordop' (logically an atomic maximum or
1521   ///                     minimum).
1522   /// \param D            The desired value for forms that use an equality
1523   ///                     comparison. If forms that use 'ordop', it should be
1524   ///                     \p nullptr.
1525   /// \param AO           Atomic ordering of the generated atomic instructions.
1526   /// \param Op           Atomic compare operation. It can only be ==, <, or >.
1527   /// \param IsXBinopExpr True if the conditional statement is in the form where
1528   ///                     x is on LHS. It only matters for < or >.
1529   /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
1530   ///                         'v', not an updated one (for compare capture
1531   ///                         only).
1532   /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
1533   ///                     only when the comparison fails. This is only valid for
1534   ///                     the case the comparison is '=='.
1535   ///
1536   /// \return Insertion point after generated atomic capture IR.
1537   InsertPointTy
1538   createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
1539                       AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
1540                       AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
1541                       bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
1542 
1543   /// Create the control flow structure of a canonical OpenMP loop.
1544   ///
1545   /// The emitted loop will be disconnected, i.e. no edge to the loop's
1546   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
1547   /// IRBuilder location is not preserved.
1548   ///
1549   /// \param DL        DebugLoc used for the instructions in the skeleton.
1550   /// \param TripCount Value to be used for the trip count.
1551   /// \param F         Function in which to insert the BasicBlocks.
1552   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
1553   ///                         typically the body itself.
1554   /// \param PostInsertBefore Where to insert BBs that execute after the body.
1555   /// \param Name      Base name used to derive BB
1556   ///                  and instruction names.
1557   ///
1558   /// \returns The CanonicalLoopInfo that represents the emitted loop.
1559   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
1560                                         Function *F,
1561                                         BasicBlock *PreInsertBefore,
1562                                         BasicBlock *PostInsertBefore,
1563                                         const Twine &Name = {});
1564 };
1565 
1566 /// Class to represented the control flow structure of an OpenMP canonical loop.
1567 ///
1568 /// The control-flow structure is standardized for easy consumption by
1569 /// directives associated with loops. For instance, the worksharing-loop
1570 /// construct may change this control flow such that each loop iteration is
1571 /// executed on only one thread. The constraints of a canonical loop in brief
1572 /// are:
1573 ///
1574 ///  * The number of loop iterations must have been computed before entering the
1575 ///    loop.
1576 ///
1577 ///  * Has an (unsigned) logical induction variable that starts at zero and
1578 ///    increments by one.
1579 ///
1580 ///  * The loop's CFG itself has no side-effects. The OpenMP specification
1581 ///    itself allows side-effects, but the order in which they happen, including
1582 ///    how often or whether at all, is unspecified. We expect that the frontend
1583 ///    will emit those side-effect instructions somewhere (e.g. before the loop)
1584 ///    such that the CanonicalLoopInfo itself can be side-effect free.
1585 ///
1586 /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
1587 /// execution of a loop body that satifies these constraints. It does NOT
1588 /// represent arbitrary SESE regions that happen to contain a loop. Do not use
1589 /// CanonicalLoopInfo for such purposes.
1590 ///
1591 /// The control flow can be described as follows:
1592 ///
1593 ///     Preheader
1594 ///        |
1595 ///  /-> Header
1596 ///  |     |
1597 ///  |    Cond---\
1598 ///  |     |     |
1599 ///  |    Body   |
1600 ///  |    | |    |
1601 ///  |   <...>   |
1602 ///  |    | |    |
1603 ///   \--Latch   |
1604 ///              |
1605 ///             Exit
1606 ///              |
1607 ///            After
1608 ///
1609 /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
1610 /// including) and end at AfterIP (at the After's first instruction, excluding).
1611 /// That is, instructions in the Preheader and After blocks (except the
1612 /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
1613 /// side-effects. Typically, the Preheader is used to compute the loop's trip
1614 /// count. The instructions from BodyIP (at the Body block's first instruction,
1615 /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
1616 /// control and thus can have side-effects. The body block is the single entry
1617 /// point into the loop body, which may contain arbitrary control flow as long
1618 /// as all control paths eventually branch to the Latch block.
1619 ///
1620 /// TODO: Consider adding another standardized BasicBlock between Body CFG and
1621 /// Latch to guarantee that there is only a single edge to the latch. It would
1622 /// make loop transformations easier to not needing to consider multiple
1623 /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
1624 /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
1625 /// executes after each body iteration.
1626 ///
1627 /// There must be no loop-carried dependencies through llvm::Values. This is
1628 /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
1629 /// for the induction variable.
1630 ///
1631 /// All code in Header, Cond, Latch and Exit (plus the terminator of the
1632 /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
1633 /// by assertOK(). They are expected to not be modified unless explicitly
1634 /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
1635 /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
1636 /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
1637 /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
1638 /// anymore as its underlying control flow may not exist anymore.
1639 /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
1640 /// may also return a new CanonicalLoopInfo that can be passed to other
1641 /// loop-associated construct implementing methods. These loop-transforming
1642 /// methods may either create a new CanonicalLoopInfo usually using
1643 /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
1644 /// modify one of the input CanonicalLoopInfo and return it as representing the
1645 /// modified loop. What is done is an implementation detail of
1646 /// transformation-implementing method and callers should always assume that the
1647 /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
1648 /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
1649 /// created by createCanonicalLoop, such that transforming methods do not have
1650 /// to special case where the CanonicalLoopInfo originated from.
1651 ///
1652 /// Generally, methods consuming CanonicalLoopInfo do not need an
1653 /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
1654 /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
1655 /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
1656 /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
1657 /// any InsertPoint in the Preheader, After or Block can still be used after
1658 /// calling such a method.
1659 ///
1660 /// TODO: Provide mechanisms for exception handling and cancellation points.
1661 ///
1662 /// Defined outside OpenMPIRBuilder because nested classes cannot be
1663 /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
1664 class CanonicalLoopInfo {
1665   friend class OpenMPIRBuilder;
1666 
1667 private:
1668   BasicBlock *Header = nullptr;
1669   BasicBlock *Cond = nullptr;
1670   BasicBlock *Latch = nullptr;
1671   BasicBlock *Exit = nullptr;
1672 
1673   /// Add the control blocks of this loop to \p BBs.
1674   ///
1675   /// This does not include any block from the body, including the one returned
1676   /// by getBody().
1677   ///
1678   /// FIXME: This currently includes the Preheader and After blocks even though
1679   /// their content is (mostly) not under CanonicalLoopInfo's control.
1680   /// Re-evaluated whether this makes sense.
1681   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
1682 
1683   /// Sets the number of loop iterations to the given value. This value must be
1684   /// valid in the condition block (i.e., defined in the preheader) and is
1685   /// interpreted as an unsigned integer.
1686   void setTripCount(Value *TripCount);
1687 
1688   /// Replace all uses of the canonical induction variable in the loop body with
1689   /// a new one.
1690   ///
1691   /// The intended use case is to update the induction variable for an updated
1692   /// iteration space such that it can stay normalized in the 0...tripcount-1
1693   /// range.
1694   ///
1695   /// The \p Updater is called with the (presumable updated) current normalized
1696   /// induction variable and is expected to return the value that uses of the
1697   /// pre-updated induction values should use instead, typically dependent on
1698   /// the new induction variable. This is a lambda (instead of e.g. just passing
1699   /// the new value) to be able to distinguish the uses of the pre-updated
1700   /// induction variable and uses of the induction varible to compute the
1701   /// updated induction variable value.
1702   void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
1703 
1704 public:
1705   /// Returns whether this object currently represents the IR of a loop. If
1706   /// returning false, it may have been consumed by a loop transformation or not
1707   /// been intialized. Do not use in this case;
1708   bool isValid() const { return Header; }
1709 
1710   /// The preheader ensures that there is only a single edge entering the loop.
1711   /// Code that must be execute before any loop iteration can be emitted here,
1712   /// such as computing the loop trip count and begin lifetime markers. Code in
1713   /// the preheader is not considered part of the canonical loop.
1714   BasicBlock *getPreheader() const;
1715 
1716   /// The header is the entry for each iteration. In the canonical control flow,
1717   /// it only contains the PHINode for the induction variable.
1718   BasicBlock *getHeader() const {
1719     assert(isValid() && "Requires a valid canonical loop");
1720     return Header;
1721   }
1722 
1723   /// The condition block computes whether there is another loop iteration. If
1724   /// yes, branches to the body; otherwise to the exit block.
1725   BasicBlock *getCond() const {
1726     assert(isValid() && "Requires a valid canonical loop");
1727     return Cond;
1728   }
1729 
1730   /// The body block is the single entry for a loop iteration and not controlled
1731   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
1732   /// eventually branch to the \p Latch block.
1733   BasicBlock *getBody() const {
1734     assert(isValid() && "Requires a valid canonical loop");
1735     return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
1736   }
1737 
1738   /// Reaching the latch indicates the end of the loop body code. In the
1739   /// canonical control flow, it only contains the increment of the induction
1740   /// variable.
1741   BasicBlock *getLatch() const {
1742     assert(isValid() && "Requires a valid canonical loop");
1743     return Latch;
1744   }
1745 
1746   /// Reaching the exit indicates no more iterations are being executed.
1747   BasicBlock *getExit() const {
1748     assert(isValid() && "Requires a valid canonical loop");
1749     return Exit;
1750   }
1751 
1752   /// The after block is intended for clean-up code such as lifetime end
1753   /// markers. It is separate from the exit block to ensure, analogous to the
1754   /// preheader, it having just a single entry edge and being free from PHI
1755   /// nodes should there be multiple loop exits (such as from break
1756   /// statements/cancellations).
1757   BasicBlock *getAfter() const {
1758     assert(isValid() && "Requires a valid canonical loop");
1759     return Exit->getSingleSuccessor();
1760   }
1761 
1762   /// Returns the llvm::Value containing the number of loop iterations. It must
1763   /// be valid in the preheader and always interpreted as an unsigned integer of
1764   /// any bit-width.
1765   Value *getTripCount() const {
1766     assert(isValid() && "Requires a valid canonical loop");
1767     Instruction *CmpI = &Cond->front();
1768     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1769     return CmpI->getOperand(1);
1770   }
1771 
1772   /// Returns the instruction representing the current logical induction
1773   /// variable. Always unsigned, always starting at 0 with an increment of one.
1774   Instruction *getIndVar() const {
1775     assert(isValid() && "Requires a valid canonical loop");
1776     Instruction *IndVarPHI = &Header->front();
1777     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
1778     return IndVarPHI;
1779   }
1780 
1781   /// Return the type of the induction variable (and the trip count).
1782   Type *getIndVarType() const {
1783     assert(isValid() && "Requires a valid canonical loop");
1784     return getIndVar()->getType();
1785   }
1786 
1787   /// Return the insertion point for user code before the loop.
1788   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
1789     assert(isValid() && "Requires a valid canonical loop");
1790     BasicBlock *Preheader = getPreheader();
1791     return {Preheader, std::prev(Preheader->end())};
1792   };
1793 
1794   /// Return the insertion point for user code in the body.
1795   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
1796     assert(isValid() && "Requires a valid canonical loop");
1797     BasicBlock *Body = getBody();
1798     return {Body, Body->begin()};
1799   };
1800 
1801   /// Return the insertion point for user code after the loop.
1802   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
1803     assert(isValid() && "Requires a valid canonical loop");
1804     BasicBlock *After = getAfter();
1805     return {After, After->begin()};
1806   };
1807 
1808   Function *getFunction() const {
1809     assert(isValid() && "Requires a valid canonical loop");
1810     return Header->getParent();
1811   }
1812 
1813   /// Consistency self-check.
1814   void assertOK() const;
1815 
1816   /// Invalidate this loop. That is, the underlying IR does not fulfill the
1817   /// requirements of an OpenMP canonical loop anymore.
1818   void invalidate();
1819 };
1820 
1821 } // end namespace llvm
1822 
1823 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
1824